In [1]:
import pandas as pd

In [2]:
# Read the CSV file into a DataFrame
file_path = '../Data/Raw/survey_data.csv'
df = pd.read_csv(file_path, header=0)

In [3]:
# Define the list of rating columns
ratings_cols = ['joy', 'sadness', 'fear', 'anger', 'disgust', 'surprise', 'other', 'other_text', 'pleasure', 'arousal', 'dominance', 'appraisal']

In [4]:
# Define the number of videos and columns per video
num_videos = 10
cols_per_video = 16

In [5]:
# Define a function to extract the video identifier from the column index
def extract_video_id(col_index):
    return (col_index - 45) // cols_per_video + 1

In [6]:
# Initialize a list to store processed rows
processed_rows = []

In [7]:
# Iterate through the rows to process each participant's data
rows = df.iloc[2:,:].iterrows()

for _, row in rows:
    # Set to store the video IDs for the current participant
    videos_to_keep = set()
    # Iterate through columns to identify videos with non-null values
    for col_index in range(45, 8237, cols_per_video):
        non_none_indices = row.iloc[col_index:col_index + cols_per_video].dropna().index
        if len(non_none_indices) > 0:
            video_id = extract_video_id(col_index)
            videos_to_keep.add(video_id)

    # Sort the videos to keep
    videos_to_keep = sorted(videos_to_keep)
    
    # List to store the column index range for each video
    column_ranges = []
    for video_id in videos_to_keep:
        start_col = 45 + (video_id - 1) * cols_per_video
        end_col = start_col + cols_per_video
        column_ranges.append(range(start_col + 4, end_col))
    
    for column_range, video_id in zip(column_ranges, videos_to_keep):
        # Construct the processed row with the current participant_id
        processed_row = {'participant_id': row.PROLIFIC_PID}
        # Extract rating data
        ratings_vals = row.iloc[column_range]
        # Store the video_id
        processed_row[f'video_id'] = video_id
        # Update the rating values for each column
        processed_row.update({col: val for col, val in zip(ratings_cols, ratings_vals)})
        # Append the processed row to the list
        processed_rows.append(processed_row)

In [8]:
# Create a new DataFrame from the processed data
processed_df = pd.DataFrame(processed_rows)

In [9]:
processed_df = processed_df.replace(['Very Low', 'Low', 'Average', 'High', 'Very High'], [1, 2, 3, 4, 5])

In [10]:
num_cols = ['joy', 'sadness', 'fear', 'anger', 'disgust', 'surprise', 'other', 'pleasure', 'arousal', 'dominance']
processed_df[num_cols] = processed_df[num_cols].astype(float)

In [11]:
# num_cols.remove('other')
# processed_df[num_cols] = processed_df[num_cols].fillna(value=0)

In [12]:
processed_df.dtypes

participant_id     object
video_id            int64
joy               float64
sadness           float64
fear              float64
anger             float64
disgust           float64
surprise          float64
other             float64
other_text         object
pleasure          float64
arousal           float64
dominance         float64
appraisal          object
dtype: object

In [13]:
processed_df

Unnamed: 0,participant_id,video_id,joy,sadness,fear,anger,disgust,surprise,other,other_text,pleasure,arousal,dominance,appraisal
0,5c791e24266f210012af518d,1,,1.0,1.0,,,2.0,,,4.0,3.0,3.0,i honestly dont know about this one
1,5c791e24266f210012af518d,6,,4.0,,,,,,,1.0,1.0,2.0,it seemed like something had made the robot sa...
2,5c791e24266f210012af518d,59,,5.0,1.0,,,,,,1.0,1.0,1.0,it seems to be sad and maybe a little confused...
3,5c791e24266f210012af518d,61,,1.0,,1.0,1.0,,,,4.0,3.0,3.0,it seemed to be kinda all over the place
4,5c791e24266f210012af518d,219,,,4.0,,,1.0,,,2.0,7.0,4.0,"it seems like something made the robot scared,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,60089c477baa6b3fba6dbe80,267,1.0,1.0,3.0,3.0,,3.0,,,3.0,3.0,5.0,The robot was a bit more irritable and fearful...
96,60089c477baa6b3fba6dbe80,322,1.0,1.0,4.0,1.0,1.0,3.0,,,1.0,1.0,1.0,The robot looked depressed and was just drivin...
97,60089c477baa6b3fba6dbe80,334,3.0,1.0,1.0,1.0,,3.0,4.0,Happy,7.0,7.0,5.0,The robot seemed confident but whimsical and w...
98,60089c477baa6b3fba6dbe80,383,3.0,1.0,1.0,1.0,1.0,1.0,4.0,Confident,7.0,7.0,6.0,The robot seemed calm and in a good mood.


In [14]:
# Save the processed data to a new CSV file
processed_df.to_csv('../Data/Processed/rating.csv', index=False)