In [1]:
import pandas as pd
pd.set_option('future.no_silent_downcasting', True)

In [2]:
# Read the CSV file into a DataFrame
file_path = 'survey_data.csv'
df = pd.read_csv(file_path, header=0)

In [3]:
# Define the list of rating columns
ratings_cols = ['joy', 'sadness', 'fear', 'anger', 'disgust', 'surprise', 'other', 'other_text', 'pleasure', 'arousal', 'dominance', 'appraisal']

In [4]:
# Define the number of videos and columns per video
num_videos = 10
cols_per_video = 16

In [5]:
# Define a function to extract the video identifier from the column index
def extract_video_id(col_index):
    return (col_index - 45) // cols_per_video + 1

In [6]:
# Initialize a list to store processed rows
processed_rows = []

In [7]:
# Iterate through the rows to process each participant's data
rows = df.iloc[2:,:].iterrows()

In [8]:
for _, row in rows:
    # Set to store the video IDs for the current participant
    videos_to_keep = set()
    # Iterate through columns to identify videos with non-null values
    for col_index in range(45, 8237, cols_per_video):
        non_none_indices = row.iloc[col_index:col_index + cols_per_video].dropna().index
        if len(non_none_indices) > 0:
            video_id = extract_video_id(col_index)
            videos_to_keep.add(video_id)

    # Sort the videos to keep
    videos_to_keep = sorted(videos_to_keep)
    
    # List to store the column index range for each video
    column_ranges = []
    for video_id in videos_to_keep:
        start_col = 45 + (video_id - 1) * cols_per_video
        end_col = start_col + cols_per_video
        column_ranges.append(range(start_col + 4, end_col))
    
    for column_range, video_id in zip(column_ranges, videos_to_keep):
        # Construct the processed row with the current participant_id
        processed_row = {'prolific_id': row.PROLIFIC_PID}
        # Extract rating data
        ratings_vals = row.iloc[column_range]
        # Store the video_id
        processed_row[f'video_id'] = video_id
        # Update the rating values for each column
        processed_row.update({col: val for col, val in zip(ratings_cols, ratings_vals)})
        # Append the processed row to the list
        processed_rows.append(processed_row)

In [9]:
# Create a new DataFrame from the processed data
processed_df = pd.DataFrame(processed_rows)

processed_df = processed_df.fillna(0)
processed_df = processed_df.replace(['Very Low', 'Low', 'Average', 'High', 'Very High'], [1, 2, 3, 4, 5])

columns_to_cast = ['joy', 'sadness', 'fear', 'anger', 'disgust', 'surprise', 'other', 'pleasure', 'arousal', 'dominance']
processed_df[columns_to_cast] = processed_df[columns_to_cast].astype(float)

# Save the processed data to a new CSV file
processed_df.to_csv('processed_data.csv', index=False)

In [10]:
processed_df.dtypes

prolific_id     object
video_id         int64
joy            float64
sadness        float64
fear           float64
anger          float64
disgust        float64
surprise       float64
other          float64
other_text      object
pleasure       float64
arousal        float64
dominance      float64
appraisal       object
dtype: object

In [11]:
processed_df

Unnamed: 0,prolific_id,video_id,joy,sadness,fear,anger,disgust,surprise,other,other_text,pleasure,arousal,dominance,appraisal
0,5c791e24266f210012af518d,1,0.0,1.0,1.0,0.0,0.0,2.0,0.0,0,4.0,3.0,3.0,i honestly dont know about this one
1,5c791e24266f210012af518d,6,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0,1.0,1.0,2.0,it seemed like something had made the robot sa...
2,5c791e24266f210012af518d,59,0.0,5.0,1.0,0.0,0.0,0.0,0.0,0,1.0,1.0,1.0,it seems to be sad and maybe a little confused...
3,5c791e24266f210012af518d,61,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0,4.0,3.0,3.0,it seemed to be kinda all over the place
4,5c791e24266f210012af518d,219,0.0,0.0,4.0,0.0,0.0,1.0,0.0,0,2.0,7.0,4.0,"it seems like something made the robot scared,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,60089c477baa6b3fba6dbe80,267,1.0,1.0,3.0,3.0,0.0,3.0,0.0,0,3.0,3.0,5.0,The robot was a bit more irritable and fearful...
96,60089c477baa6b3fba6dbe80,322,1.0,1.0,4.0,1.0,1.0,3.0,0.0,0,1.0,1.0,1.0,The robot looked depressed and was just drivin...
97,60089c477baa6b3fba6dbe80,334,3.0,1.0,1.0,1.0,0.0,3.0,4.0,Happy,7.0,7.0,5.0,The robot seemed confident but whimsical and w...
98,60089c477baa6b3fba6dbe80,383,3.0,1.0,1.0,1.0,1.0,1.0,4.0,Confident,7.0,7.0,6.0,The robot seemed calm and in a good mood.


In [12]:
grouped_df = processed_df.groupby(['video_id'])

In [13]:
grouped_df.describe()

Unnamed: 0_level_0,joy,joy,joy,joy,joy,joy,joy,joy,sadness,sadness,...,arousal,arousal,dominance,dominance,dominance,dominance,dominance,dominance,dominance,dominance
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
video_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1,1.0,0.0,,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,3.0,3.0,1.0,3.0,,3.0,3.0,3.0,3.0,3.0
2,1.0,0.0,,0.0,0.0,0.0,0.0,0.0,1.0,2.0,...,6.0,6.0,1.0,4.0,,4.0,4.0,4.0,4.0,4.0
4,1.0,2.0,,2.0,2.0,2.0,2.0,2.0,1.0,1.0,...,9.0,9.0,1.0,5.0,,5.0,5.0,5.0,5.0,5.0
6,1.0,0.0,,0.0,0.0,0.0,0.0,0.0,1.0,4.0,...,1.0,1.0,1.0,2.0,,2.0,2.0,2.0,2.0,2.0
14,1.0,5.0,,5.0,5.0,5.0,5.0,5.0,1.0,1.0,...,7.0,7.0,1.0,6.0,,6.0,6.0,6.0,6.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
470,1.0,0.0,,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,4.0,4.0,1.0,5.0,,5.0,5.0,5.0,5.0,5.0
472,1.0,2.0,,2.0,2.0,2.0,2.0,2.0,1.0,0.0,...,7.0,7.0,1.0,4.0,,4.0,4.0,4.0,4.0,4.0
474,1.0,4.0,,4.0,4.0,4.0,4.0,4.0,1.0,2.0,...,3.0,3.0,1.0,2.0,,2.0,2.0,2.0,2.0,2.0
475,1.0,3.0,,3.0,3.0,3.0,3.0,3.0,1.0,1.0,...,3.0,3.0,1.0,3.0,,3.0,3.0,3.0,3.0,3.0


In [14]:
sorted(processed_df['video_id'].unique())

[1,
 2,
 4,
 6,
 14,
 15,
 16,
 25,
 30,
 35,
 42,
 45,
 46,
 49,
 59,
 61,
 71,
 75,
 76,
 79,
 80,
 94,
 98,
 100,
 108,
 113,
 115,
 118,
 122,
 128,
 131,
 133,
 148,
 152,
 161,
 165,
 167,
 176,
 180,
 183,
 184,
 192,
 196,
 202,
 207,
 211,
 213,
 216,
 217,
 219,
 224,
 239,
 256,
 258,
 263,
 267,
 274,
 304,
 308,
 314,
 317,
 319,
 322,
 323,
 330,
 334,
 341,
 343,
 344,
 346,
 348,
 353,
 362,
 367,
 369,
 370,
 374,
 376,
 378,
 379,
 382,
 383,
 388,
 392,
 407,
 409,
 423,
 424,
 425,
 428,
 458,
 464,
 465,
 467,
 469,
 470,
 472,
 474,
 475,
 512]

In [15]:
sorted(processed_df['prolific_id'].unique())

['5a631a73b9e3b50001a6ebd8',
 '5b09da13641b1200010eab05',
 '5c791e24266f210012af518d',
 '5e58436ebdccf5057ddd9190',
 '5eebc1fd5feed239a73d693d',
 '60089c477baa6b3fba6dbe80',
 '6089aa8120d7418a70f3eba7',
 '63fc7ac4c0ba10b71a25a63d',
 '648ddf3aeff70c1f26ff2652',
 '656cbb93ba4ac7120453d910']

In [16]:
processed_df[processed_df['prolific_id'] == '656cbb93ba4ac7120453d910']

Unnamed: 0,prolific_id,video_id,joy,sadness,fear,anger,disgust,surprise,other,other_text,pleasure,arousal,dominance,appraisal
60,656cbb93ba4ac7120453d910,4,2.0,1.0,1.0,0.0,0.0,3.0,4.0,Confusion,7.0,9.0,5.0,The robot seemed confused as it looked around ...
61,656cbb93ba4ac7120453d910,45,5.0,0.0,0.0,0.0,0.0,4.0,5.0,Curiosity,9.0,9.0,6.0,The robot seemed curious about its surrounding...
62,656cbb93ba4ac7120453d910,46,3.0,0.0,0.0,0.0,0.0,1.0,5.0,Calm,8.0,9.0,7.0,"The robot seemed very calm and moving slowly, ..."
63,656cbb93ba4ac7120453d910,71,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0,5.0,7.0,7.0,The robot seemed somewhat disturbed as it beep...
64,656cbb93ba4ac7120453d910,79,4.0,0.0,0.0,0.0,0.0,4.0,0.0,0,9.0,7.0,5.0,The robot beeped very happy and moved around s...
65,656cbb93ba4ac7120453d910,319,1.0,2.0,4.0,2.0,1.0,0.0,0.0,0,3.0,7.0,5.0,"The robot had red lights and seemed upset, as ..."
66,656cbb93ba4ac7120453d910,341,0.0,1.0,4.0,0.0,1.0,2.0,5.0,Confusion,2.0,9.0,5.0,The robot was very confused and was for some r...
67,656cbb93ba4ac7120453d910,362,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0,7.0,7.0,5.0,The robot moved slowly around but overall seem...
68,656cbb93ba4ac7120453d910,367,0.0,2.0,1.0,0.0,1.0,2.0,5.0,Lost,2.0,7.0,4.0,The robot seemed very lost and unsure on what ...
69,656cbb93ba4ac7120453d910,425,3.0,1.0,0.0,0.0,0.0,3.0,4.0,Confusion,5.0,6.0,5.0,"The robot moved around, seeming somewhat confu..."
