In [27]:
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [28]:
# load apple watch data
comedy_aw = pd.read_csv('generated_dfs/comedy_aw.csv')
documentary_aw = pd.read_csv('generated_dfs/documentary_aw.csv')
horror_aw = pd.read_csv('generated_dfs/horror_aw.csv')

# load tracked motion data
comedy_tm = pd.read_csv('generated_dfs/tracked_motion_comedy.csv')
documentary_tm = pd.read_csv('generated_dfs/tracked_motion_documentary.csv')
horror_tm = pd.read_csv('generated_dfs/tracked_motion_horror.csv')

# load pupil size data
comedy_ps = pd.read_csv('generated_dfs/pupil_size_comedy.csv')
documentary_ps = pd.read_csv('generated_dfs/pupil_size_documentary.csv')
horror_ps = pd.read_csv('generated_dfs/pupil_size_horror.csv')

In [29]:
def merge_dataframes(aw_df, tm_df, ps_df):
    # Convert to datetime
    aw_df['time'] = pd.to_datetime(aw_df['time']).dt.tz_localize(None)
    tm_df['timestamp'] = pd.to_datetime(tm_df['timestamp']).dt.tz_localize(None)
    ps_df['timestamp'] = pd.to_datetime(ps_df['timestamp']).dt.tz_localize(None)
    
    # Sort both DataFrames (required for merge_asof)
    aw = aw_df.sort_values('time')
    tm = tm_df.sort_values('timestamp')
    ps = ps_df.sort_values('timestamp')
    
    # Merge on nearest time
    merged = pd.merge_asof(tm, aw, left_on='timestamp', right_on='time', direction='nearest')
    return pd.merge_asof(merged, ps, left_on='timestamp', right_on='timestamp', direction='nearest')
    

def aggregate_data(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Floor timestamps to the nearest second
    df['time_rounded'] = df['timestamp'].dt.floor('1S')
    
    # Group by the rounded time and average
    return df.groupby('time_rounded').mean().reset_index()

In [33]:
comedy = merge_dataframes(comedy_aw, comedy_tm, comedy_ps)
documentary = merge_dataframes(documentary_aw, documentary_tm, documentary_ps)
horror = merge_dataframes(horror_aw, horror_tm, horror_ps)

In [34]:
# Add genre column
comedy['genre'] = 'comedy'
documentary['genre'] = 'documentary'
horror['genre'] = 'horror'

# Concatenate all
df_all = pd.concat([comedy, documentary, horror], ignore_index=True)

In [35]:
df_all.to_csv('all_genres.csv', index=False)