In [17]:
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [2]:
# load apple watch data
comedy_aw = pd.read_csv('generated_dfs/comedy_aw.csv')
documentary_aw = pd.read_csv('generated_dfs/documentary_aw.csv')
horror_aw = pd.read_csv('generated_dfs/horror_aw.csv')

# load tracked motion data
comedy_tm = pd.read_csv('generated_dfs/tracked_motion_comedy.csv')
documentary_tm = pd.read_csv('generated_dfs/tracked_motion_documentary.csv')
horror_tm = pd.read_csv('generated_dfs/tracked_motion_horror.csv')

In [15]:
def merge_dataframes(aw_df, tm_df):
    # Convert to datetime
    aw_df['time'] = pd.to_datetime(aw_df['time']).dt.tz_localize(None)
    tm_df['timestamp'] = pd.to_datetime(tm_df['timestamp']).dt.tz_localize(None)
    
    # Sort both DataFrames (required for merge_asof)
    comedy_aw = aw_df.sort_values('time')
    comedy_tm = tm_df.sort_values('timestamp')
    
    # Merge on nearest time
    return pd.merge_asof(comedy_tm, comedy_aw, left_on='timestamp', right_on='time', direction='nearest')

def aggregate_data(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Floor timestamps to the nearest second
    df['time_rounded'] = df['timestamp'].dt.floor('1S')
    
    # Group by the rounded time and average
    return df.groupby('time_rounded').mean().reset_index()

In [18]:
comedy = aggregate_data(merge_dataframes(comedy_aw, comedy_tm))
documentary = aggregate_data(merge_dataframes(documentary_aw, documentary_tm))
horror = aggregate_data(merge_dataframes(horror_aw, horror_tm))

In [20]:
comedy['heart_rate']

0       62.0
1       62.0
2       62.0
3       62.0
4       62.0
        ... 
1882    63.0
1883    63.0
1884    63.0
1885    63.0
1886    63.0
Name: heart_rate, Length: 1887, dtype: float64

In [21]:
# Add genre column
comedy['genre'] = 'comedy'
documentary['genre'] = 'documentary'
horror['genre'] = 'horror'

# Concatenate all
df_all = pd.concat([comedy, documentary, horror], ignore_index=True)

In [23]:
df_all.to_csv('all_genres.csv', index=False)