In [2]:
import pandas as pd
import glob

In [8]:
csv_files = glob.glob("./human_study/data/preprocessed/Story*")

In [53]:
p_id = "Participant ID\n(This number is assigned to you by the study organizers. Please request for one if you have not received one. Thanks!)"
dfs = []
for f in csv_files:
    df = pd.read_csv(f).rename(columns={p_id:"participant_id"}).sort_values(by="participant_id")
    dfs.append(df)
df = pd.concat(dfs)

In [66]:
participant_id_col = "participant_id"

mappings = [
    ("**authentic**", "authenticity"),
    ("**empathy**", "empathy"),
    ("**engaging**", "engagement"),
    ("**provoke emotion**", "emotion_provoking"),
    ("**narratively complex**", "narrative_complexity")    
]

In [67]:
results = []

for search_val, rename_val in mappings:
    
    # Extracting all variations of the 'authenticity' columns
    target_columns = [col for col in df.columns if search_val in col.lower()]
    target_df = df[target_columns + [participant_id_col]]
    
    # Step 2: Reshape Data
    # Melt the DataFrame to transform the multiple component columns into rows
    df_melted = pd.melt(target_df, 
                                  id_vars=[participant_id_col], 
                                  value_vars=target_columns, 
                                  var_name=f'{rename_val}_col', 
                                  value_name=f'{rename_val}_score')
    
    # Displaying the reshaped DataFrame for component
    results.append(df_melted)


In [76]:
keep_cols = ["participant_id", "authenticity_score", "empathy_score", "engagement_score", "emotion_provoking_score", "narrative_complexity_score"]

df = pd.concat(results, axis=1)[keep_cols]
df = df.loc[:, ~df.columns.duplicated()]
df['story_id'] = df.index // 5

In [81]:
final_cols = ["participant_id", "story_id", "authenticity_score", "empathy_score", "engagement_score", "emotion_provoking_score", "narrative_complexity_score"]

df[final_cols].to_csv("./human_study/data/preprocessed/annotations.csv")