In [1]:
import pandas as pd
import numpy as np

In [2]:
comedy = pd.read_csv('source_data/kenji/tracked_motion_comedy.csv')
documentary = pd.read_csv('source_data/kenji/tracked_motion_documentary.csv')
horror = pd.read_csv('source_data/kenji/tracked_motion_horror.csv')

In [3]:
comedy

Unnamed: 0,timestamp,time_elapsed_seconds,x_coordinate,y_coordinate
0,2025-06-05 16:25:43.743,0.032,402,190
1,2025-06-05 16:25:43.771,0.060,402,190
2,2025-06-05 16:25:43.823,0.112,402,190
3,2025-06-05 16:25:43.852,0.141,402,190
4,2025-06-05 16:25:43.869,0.158,402,190
...,...,...,...,...
56561,2025-06-05 16:57:09.212,1885.501,438,-40
56562,2025-06-05 16:57:09.241,1885.530,438,-40
56563,2025-06-05 16:57:09.277,1885.566,438,-40
56564,2025-06-05 16:57:09.307,1885.596,438,-40


In [4]:
comedy.drop('time_elapsed_seconds', axis=1, inplace=True)
documentary.drop('time_elapsed_seconds', axis=1, inplace=True)
horror.drop('time_elapsed_seconds', axis=1, inplace=True)

In [7]:
horror

Unnamed: 0,timestamp,x_coordinate,y_coordinate
0,2025-06-05 17:58:47.491,354,237
1,2025-06-05 17:58:47.558,354,237
2,2025-06-05 17:58:47.586,354,237
3,2025-06-05 17:58:47.616,354,236
4,2025-06-05 17:58:47.653,354,236
...,...,...,...
57090,2025-06-05 18:30:30.536,722,480
57091,2025-06-05 18:30:30.567,722,480
57092,2025-06-05 18:30:30.597,722,480
57093,2025-06-05 18:30:30.627,722,480


In [8]:
def aggregate_data(df):
    df['timestamp'] = pd.to_datetime(df['timestamp']).dt.tz_localize(None)

    # Floor timestamps to the nearest second
    df['time_rounded'] = df['timestamp'].dt.floor('1s')
    
    # Group by the rounded time and average
    return df.groupby('time_rounded').mean().reset_index()

comedy = aggregate_data(comedy)
documentary = aggregate_data(documentary)
horror = aggregate_data(horror)

In [9]:
comedy

Unnamed: 0,time_rounded,timestamp,x_coordinate,y_coordinate
0,2025-06-05 16:25:43,2025-06-05 16:25:43.870222336,402.111111,190.222222
1,2025-06-05 16:25:44,2025-06-05 16:25:44.504862208,411.689655,193.379310
2,2025-06-05 16:25:45,2025-06-05 16:25:45.499677440,395.870968,189.870968
3,2025-06-05 16:25:46,2025-06-05 16:25:46.511133440,399.233333,191.000000
4,2025-06-05 16:25:47,2025-06-05 16:25:47.500861952,398.827586,191.034483
...,...,...,...,...
1882,2025-06-05 16:57:05,2025-06-05 16:57:05.490633216,438.000000,-40.000000
1883,2025-06-05 16:57:06,2025-06-05 16:57:06.498548480,438.000000,-40.000000
1884,2025-06-05 16:57:07,2025-06-05 16:57:07.511800064,438.000000,-40.000000
1885,2025-06-05 16:57:08,2025-06-05 16:57:08.503344896,438.000000,-40.000000


In [10]:
comedy.drop('timestamp', axis=1, inplace=True)
documentary.drop('timestamp', axis=1, inplace=True)
horror.drop('timestamp', axis=1, inplace=True)

comedy['timestamp'] = comedy['time_rounded']
documentary['timestamp'] = documentary['time_rounded']
horror['timestamp'] = horror['time_rounded']

comedy.drop('time_rounded', axis=1, inplace=True)
documentary.drop('time_rounded', axis=1, inplace=True)
horror.drop('time_rounded', axis=1, inplace=True)

In [11]:
comedy

Unnamed: 0,x_coordinate,y_coordinate,timestamp
0,402.111111,190.222222,2025-06-05 16:25:43
1,411.689655,193.379310,2025-06-05 16:25:44
2,395.870968,189.870968,2025-06-05 16:25:45
3,399.233333,191.000000,2025-06-05 16:25:46
4,398.827586,191.034483,2025-06-05 16:25:47
...,...,...,...
1882,438.000000,-40.000000,2025-06-05 16:57:05
1883,438.000000,-40.000000,2025-06-05 16:57:06
1884,438.000000,-40.000000,2025-06-05 16:57:07
1885,438.000000,-40.000000,2025-06-05 16:57:08


In [12]:
comedy = comedy[['timestamp'] + [col for col in comedy.columns if col != 'timestamp']]
documentary = documentary[['timestamp'] + [col for col in documentary.columns if col != 'timestamp']]
horror = horror[['timestamp'] + [col for col in horror.columns if col != 'timestamp']]

In [13]:
comedy

Unnamed: 0,timestamp,x_coordinate,y_coordinate
0,2025-06-05 16:25:43,402.111111,190.222222
1,2025-06-05 16:25:44,411.689655,193.379310
2,2025-06-05 16:25:45,395.870968,189.870968
3,2025-06-05 16:25:46,399.233333,191.000000
4,2025-06-05 16:25:47,398.827586,191.034483
...,...,...,...
1882,2025-06-05 16:57:05,438.000000,-40.000000
1883,2025-06-05 16:57:06,438.000000,-40.000000
1884,2025-06-05 16:57:07,438.000000,-40.000000
1885,2025-06-05 16:57:08,438.000000,-40.000000


In [14]:
documentary

Unnamed: 0,timestamp,x_coordinate,y_coordinate
0,2025-06-05 17:13:12,345.000000,229.000000
1,2025-06-05 17:13:13,378.586207,223.862069
2,2025-06-05 17:13:14,342.733333,225.800000
3,2025-06-05 17:13:15,358.290323,227.096774
4,2025-06-05 17:13:16,354.000000,225.466667
...,...,...,...
1889,2025-06-05 17:44:41,367.566667,271.933333
1890,2025-06-05 17:44:42,368.387097,272.354839
1891,2025-06-05 17:44:43,368.413793,273.000000
1892,2025-06-05 17:44:44,368.000000,273.000000


In [15]:
horror

Unnamed: 0,timestamp,x_coordinate,y_coordinate
0,2025-06-05 17:58:47,354.812500,234.750000
1,2025-06-05 17:58:48,392.793103,239.206897
2,2025-06-05 17:58:49,374.433333,237.000000
3,2025-06-05 17:58:50,374.645161,240.709677
4,2025-06-05 17:58:51,374.000000,242.034483
...,...,...,...
1899,2025-06-05 18:30:26,722.000000,480.000000
1900,2025-06-05 18:30:27,722.000000,480.000000
1901,2025-06-05 18:30:28,722.000000,480.000000
1902,2025-06-05 18:30:29,722.000000,480.000000


In [16]:
comedy.to_csv('../data_collection/generated_data/comedy_tm.csv', index=False)
documentary.to_csv('../data_collection/generated_data/documentary_tm.csv', index=False)
horror.to_csv('../data_collection/generated_data/horror_tm.csv', index=False)