In [100]:
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [101]:
# load apple watch data
comedy_aw = pd.read_csv('generated_dfs/comedy_aw.csv')
documentary_aw = pd.read_csv('generated_dfs/documentary_aw.csv')
horror_aw = pd.read_csv('generated_dfs/horror_aw.csv')

# load tracked motion data
comedy_tm = pd.read_csv('generated_dfs/tracked_motion_comedy.csv')
documentary_tm = pd.read_csv('generated_dfs/tracked_motion_documentary.csv')
horror_tm = pd.read_csv('generated_dfs/tracked_motion_horror.csv')

# load pupil size data
comedy_ps = pd.read_csv('generated_dfs/pupil_size_comedy.csv')
documentary_ps = pd.read_csv('generated_dfs/pupil_size_documentary.csv')
horror_ps = pd.read_csv('generated_dfs/pupil_size_horror.csv')

In [102]:
# Drop useless column
comedy_ps.drop(columns=['time'], inplace=True)

In [103]:
def merge_dataframes(aw_df, tm_df, ps_df):
    # Convert to datetime
    aw_df['timestamp'] = pd.to_datetime(aw_df['time']).dt.tz_localize(None)
    tm_df['timestamp'] = pd.to_datetime(tm_df['timestamp']).dt.tz_localize(None)
    ps_df['timestamp'] = pd.to_datetime(ps_df['timestamp']).dt.tz_localize(None)
    
    # Drop useless columns
    aw_df.drop(columns=['time'], inplace=True)
    tm_df.drop(columns=['time_elapsed_seconds'], inplace=True)
    
    # Sort all DataFrames (required for merge_asof)
    aw = aw_df.sort_values('timestamp')
    tm = tm_df.sort_values('timestamp')
    ps = ps_df.sort_values('timestamp')
    
    # Merge on nearest time
    merged = pd.merge_asof(tm, aw, left_on='timestamp', right_on='timestamp', direction='nearest')
    return pd.merge_asof(merged, ps, left_on='timestamp', right_on='timestamp', direction='nearest')
    

def aggregate_data(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Floor timestamps to the nearest second
    df['time_rounded'] = df['timestamp'].dt.floor('1S')
    
    # Group by the rounded time and average
    return df.groupby('time_rounded').mean().reset_index()

In [104]:
comedy = merge_dataframes(comedy_aw, comedy_tm, comedy_ps)
documentary = merge_dataframes(documentary_aw, documentary_tm, documentary_ps)
horror = merge_dataframes(horror_aw, horror_tm, horror_ps)

In [105]:
# Add genre column
comedy['genre'] = 'comedy'
documentary['genre'] = 'documentary'
horror['genre'] = 'horror'

# Concatenate all
df_all = pd.concat([comedy, documentary, horror], ignore_index=True)

In [106]:
df_all

Unnamed: 0,timestamp,x_coordinate,y_coordinate,active_energy_burned,basal_energy_burned,env_audio_exposure,heart_rate,physical_effort,success,pupil_diameter_mm,...,pupil_detected,iris_detected,pupil_center_x,pupil_center_y,iris_center_x,iris_center_y,gaze_magnitude,concentricity_score,pupil_circularity,genre
0,2025-06-05 16:25:43.743,402,190,,,,62.0,,True,0.0,...,True,False,136,84,0,0,0.0,0.0,0.450210,comedy
1,2025-06-05 16:25:43.771,402,190,,,,62.0,,True,0.0,...,True,False,243,22,0,0,0.0,0.0,0.399006,comedy
2,2025-06-05 16:25:43.823,402,190,,,,62.0,,True,0.0,...,True,False,243,22,0,0,0.0,0.0,0.399006,comedy
3,2025-06-05 16:25:43.852,402,190,,,,62.0,,True,0.0,...,True,False,243,22,0,0,0.0,0.0,0.399006,comedy
4,2025-06-05 16:25:43.869,402,190,,,,62.0,,True,0.0,...,True,False,243,22,0,0,0.0,0.0,0.399006,comedy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170446,2025-06-05 18:30:30.536,722,480,,,,54.0,,True,0.0,...,True,False,41,37,0,0,0.0,0.0,0.681159,horror
170447,2025-06-05 18:30:30.567,722,480,,,,54.0,,True,0.0,...,True,False,41,37,0,0,0.0,0.0,0.681159,horror
170448,2025-06-05 18:30:30.597,722,480,,,,54.0,,True,0.0,...,True,False,41,37,0,0,0.0,0.0,0.681159,horror
170449,2025-06-05 18:30:30.627,722,480,,,,54.0,,True,0.0,...,True,False,41,37,0,0,0.0,0.0,0.681159,horror


In [108]:
df_all.to_csv('all_genres.csv', index=False)

In [107]:
len(df_all)

170451