In [None]:
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
# Calculate z-scores for x and y columns
# make this code into a function that takes a dataframe and returns the cleaned dataframe

from scipy.stats import zscore
def clean_outliers(df):
    z_scores_x = zscore(df.iloc[:,0])
    z_scores_y = zscore(df.iloc[:,1])

    # Define threshold for z-score (e.g., 3)
    threshold = 3

    # Identify outliers based on z-scores
    outliers_x = df[abs(z_scores_x) > threshold]
    outliers_y = df[abs(z_scores_y) > threshold]

    # Remove outliers from DataFrame
    cleaned_df = df[~df.index.isin(outliers_x.index) & ~df.index.isin(outliers_y.index)]
    return cleaned_df

In [None]:
from moviepy.editor import VideoFileClip
video_folder = '/Users/andrei-macpro/Documents/Data/videos/play_videos' 

durations = []
file_names = []

for folder_name in sorted(os.listdir(video_folder)):
    if folder_name == ".DS_Store":
        continue
    file_path = os.path.join(video_folder, folder_name)
    if os.path.isfile(file_path):
        clip = VideoFileClip(file_path)
        durations.append(clip.duration)
        file_names.append(folder_name.split('.')[0])

durations = pd.DataFrame({'file_name': file_names, 'duration': durations})

In [None]:
tracking_folder = '/Users/andrei-macpro/Documents/Data/openpose/play/tracking/tracking' 

In [None]:
# now do it for all videos
stats = {}
scaler = StandardScaler()
for folder_name in sorted(os.listdir(tracking_folder)):
    if folder_name == ".DS_Store":
        continue
    file_path = os.path.join(tracking_folder, folder_name)
    tracks = {}
    for file in sorted(os.listdir(file_path)):
                # load the file in a pandas dataframe
            if file == ".DS_Store":
                continue    
            filepath = os.path.join(tracking_folder,folder_name, file)
            df = pd.read_csv(filepath, index_col=0)
            df = df[~df.index.duplicated(keep='first')]
            df.columns = ['x_' + file.split('.')[0], 'y_' + file.split('.')[0]]
            df = clean_outliers(df)
            tracks[file.split('.')[0]] = df

    print(folder_name)
    combined_data = pd.concat([df for df in tracks.values()], axis=1)
    combined_scaled = scaler.fit_transform(combined_data)
    # Convert the scaled data back to a DataFrame
    combined_scaled_df = pd.DataFrame(combined_scaled, index=combined_data.index, columns=combined_data.columns)

# Split the combined_scaled_df back into separate DataFrames and assign them back to the tracks dictionary
    for track_name in tracks.keys():
        x_cols = [col for col in combined_scaled_df.columns if col.startswith('x_' + track_name)]
        y_cols = [col for col in combined_scaled_df.columns if col.startswith('y_' + track_name)]
        tracks[track_name] = combined_scaled_df[x_cols + y_cols]

    # Calculate Euclidean distances
    for track_name, track_df in tracks.items():
        angles_horizontal = []
        angles_vertical = []
        frame_indices = track_df.index
        
        for i in range(len(track_df)-1):
            
            if frame_indices[i + 1] - frame_indices[i] < 5:
                dx = track_df['x_' + track_name].iloc[i+1] - track_df['x_' + track_name].iloc[i]
                dy = track_df['y_' + track_name].iloc[i+1] - track_df['y_' + track_name].iloc[i]
        
        # Calculate the angle in radians with respect to horizontal axis
            angle_horizontal = np.arctan2(dy, dx)
            #calculate the angle in radians with respect to vertical axis
            angle_vertical = np.arctan2(dx, dy)

            # Convert the angle to degrees
            angle_horizontal = np.degrees(angle_horizontal)
            angle_vertical = np.degrees(angle_vertical)

            angles_horizontal.append(angle_horizontal)
            angles_vertical.append(angle_vertical)

        else:
            angles_horizontal.append(np.nan)
            angles_vertical.append(np.nan)
         # Append np.nan to match the number of rows in the DataFrame
        track_df['angle_horizontal'] = angles_horizontal
        track_df['angle_vertical'] = angles_vertical

        duration = durations.loc[durations['file_name'] == folder_name]['duration'].values[0]

        mean_angle_horizontal = track_df['angle_horizontal'].mean() / duration*100
        median_angle_horizontal = track_df['angle_horizontal'].median() / duration*100
        var_angle_horizontal = track_df['angle_horizontal'].var()/ duration*100
        std_angle_horizontal = track_df['angle_horizontal'].std()/ duration*100
        min_angle_horizontal = track_df['angle_horizontal'].min()/ duration*100
        max_angle_horizontal = track_df['angle_horizontal'].max()/ duration*100
        skewness_angle_horizontal = track_df['angle_horizontal'].skew()/ duration*100
        kurtosis_angle_horizontal = track_df['angle_horizontal'].kurtosis()/ duration*100

        mean_angle_vertical = track_df['angle_vertical'].mean()/ duration*100
        median_angle_vertical = track_df['angle_vertical'].median()/ duration*100
        var_angle_vertical = track_df['angle_vertical'].var()/ duration*100
        std_angle_vertical = track_df['angle_vertical'].std()/ duration*100
        min_angle_vertical = track_df['angle_vertical'].min()/ duration*100
        max_angle_vertical = track_df['angle_vertical'].max()/ duration*100
        skewness_angle_vertical = track_df['angle_vertical'].skew()/ duration*100
        kurtosis_angle_vertical = track_df['angle_vertical'].kurtosis()/ duration*100

        
        if folder_name not in stats:
            stats[folder_name] = {}
        stats[folder_name][track_name] = {

        'angle_horizontal_mean': mean_angle_horizontal,
        'angle_horizontal_median': median_angle_horizontal,
        'angle_horizontal_var': var_angle_horizontal,
        'angle_horizontal_std': std_angle_horizontal,
        'angle_horizontal_min': min_angle_horizontal,
        'angle_horizontal_max': max_angle_horizontal,
        'angle_horizontal_skew': skewness_angle_horizontal,
        'angle_horizontal_kurt': kurtosis_angle_horizontal,
        'angle_vertical_mean': mean_angle_vertical,
        'angle_vertical_median': median_angle_vertical,
        'angle_vertical_var': var_angle_vertical,
        'angle_vertical_std': std_angle_vertical,
        'angle_vertical_min': min_angle_vertical,
        'angle_vertical_max': max_angle_vertical,
        'angle_vertical_skew': skewness_angle_vertical,
        'angle_vertical_kurt': kurtosis_angle_vertical
    }

# normalize the distances (mean, variance etc) by the duration of the video
    

In [None]:
# Convert the stats dictionary to a DataFrame
df = pd.DataFrame.from_dict({(i,j): stats[i][j] 
                           for i in stats.keys() 
                           for j in stats[i].keys()},
                           orient='index')

# Set the index names
df.index.names = ['s_id', 'track_name']

In [None]:
# Reset only the second level of the index (track_name)
df.reset_index(level=1, inplace=True)

# Create new DataFrame to hold results
new_df = pd.DataFrame(index=df.index.unique())

# Loop over each unique track_name
for track_name in df['track_name'].unique():
    # Select rows for this track_name
    temp_df = df[df['track_name'] == track_name].copy()
    # Drop the 'track_name' column as it's no longer needed
    temp_df.drop(columns=['track_name'], inplace=True)
    # Add the track_name as a prefix to each column name
    temp_df.columns = [f'{track_name}_{col}' for col in temp_df.columns]
    # Add the results to new_df
    new_df = new_df.join(temp_df)

new_df

In [None]:
# save df to /Users/andrei-macpro/Documents/Data/tracking/features
new_df.to_csv('/Users/andrei-macpro/Documents/Data/tracking/features/play/angles_each.csv')