# Data Preprocessing

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter

In [None]:
df = pd.read_csv('../../data/dataframes/labels_and_coordinates.csv')

In [None]:
df.head()

## 1 - Making some data preprocessing steps

### 1.1 - Dropping NaN columns

In [None]:
# how many frames are there before any preprocessing
print(f"\nTotal number of frames before dropping NaN values: {df.shape[0]}")

# dropping all the colums with NaN values
df.dropna(inplace=True)

# how many frames are left
print(f"\nTotal number of frames after dropping NaN values: {df.shape[0]}")

### 1.2 - Calculate COM, knee and elbow angle
Goal: adding 'com_x', 'com_y', 'left_elbow_angle', 'right_elbow_angle', 'left_knee_angle', 'right_knee_angle' per frame 

In [None]:
# center of mass calculation
def calculate_com(df):
    df['com_x'] = (
        0.532*df['left_hip_x'] + 0.1175*df['left_knee_x'] + 0.1175*df['right_knee_x'] + 0.0535*df['left_ankle_x'] 
        + 0.0535*df['right_ankle_x'] + 0.0133*df['left_foot_x'] + 0.0133*df['right_foot_x'] + 0.029*df['left_shoulder_x'] 
        + 0.029*df['right_shoulder_x'] + 0.0157*df['left_elbow_x'] + 0.0157*df['right_elbow_x'] + 0.005*df['left_wrist_x'] 
        + 0.005*df['right_wrist_x']
    )

    df['com_y'] = (
        0.532*df['left_hip_y'] + 0.1175*df['left_knee_y'] + 0.1175*df['right_knee_y'] + 0.0535*df['left_ankle_y'] 
        + 0.0535*df['right_ankle_y'] + 0.0133*df['left_foot_y'] + 0.0133*df['right_foot_y'] + 0.029*df['left_shoulder_y'] 
        + 0.029*df['right_shoulder_y'] + 0.0157*df['left_elbow_y'] + 0.0157*df['right_elbow_y'] + 0.005*df['left_wrist_y'] 
        + 0.005*df['right_wrist_y']
    )
    return df


# Define the angle calculation
def calculate_angle(p1, p2, p3):
    a = np.array(p1) - np.array(p2)
    b = np.array(p3) - np.array(p2)
    
    cosine_angle = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
    angle = np.arccos(cosine_angle)
    
    return np.degrees(angle)


# Apply the angle calculation to the DataFrame: elbow and knee angles
def add_angles_to_df(df):
    df['left_elbow_angle'] = df.apply(lambda row: calculate_angle(
        [row['left_shoulder_x'], row['left_shoulder_y']],
        [row['left_elbow_x'], row['left_elbow_y']],
        [row['left_wrist_x'], row['left_wrist_y']]
    ), axis=1)

    df['right_elbow_angle'] = df.apply(lambda row: calculate_angle(
        [row['right_shoulder_x'], row['right_shoulder_y']],
        [row['right_elbow_x'], row['right_elbow_y']],
        [row['right_wrist_x'], row['right_wrist_y']]
    ), axis=1)

    df['left_knee_angle'] = df.apply(lambda row: calculate_angle(
        [row['left_hip_x'], row['left_hip_y']],
        [row['left_knee_x'], row['left_knee_y']],
        [row['left_ankle_x'], row['left_ankle_y']]
    ), axis=1)

    df['right_knee_angle'] = df.apply(lambda row: calculate_angle(
        [row['right_hip_x'], row['right_hip_y']],
        [row['right_knee_x'], row['right_knee_y']],
        [row['right_ankle_x'], row['right_ankle_y']]
    ), axis=1)

    return df

In [None]:
# Calculate the center of mass
df = calculate_com(df)

# Add the angles to the DataFrame
df = add_angles_to_df(df)

In [None]:
print(df.columns.tolist())

### 1.3 - Calculating velocity and acceleration
**calculated per boulder per athlete!**

In [None]:
df['participant'].unique()

In [None]:
df['boulder'].unique()

In [None]:
def calculate_velocity_acceleration(df):
    # Group by participant and boulder
    grouped = df.groupby(['participant', 'boulder'])

    # Calculate velocities
    df['com_vx'] = grouped['com_x'].diff() / grouped['time(s)'].diff()
    df['com_vy'] = grouped['com_y'].diff() / grouped['time(s)'].diff()

    # Calculate accelerations
    df['com_ax'] = grouped['com_vx'].diff() / grouped['time(s)'].diff()
    df['com_ay'] = grouped['com_vy'].diff() / grouped['time(s)'].diff()

    # Handle first frame for each participant and boulder
    first_frames = df.groupby(['participant', 'boulder']).head(1).index
    df.loc[first_frames, ['com_vx', 'com_vy', 'com_ax', 'com_ay']] = 0  # Or set to NaN if initial conditions are unknown

    # Handle last frame for each participant and boulder
    for name, group in grouped:
        if len(group) > 1:
            last_index = group.index[-1]
            second_last_index = group.index[-2]
            df.loc[last_index, ['com_vx', 'com_vy', 'com_ax', 'com_ay']] = df.loc[second_last_index, ['com_vx', 'com_vy', 'com_ax', 'com_ay']]
        else:
            df.loc[group.index[0], ['com_vx', 'com_vy', 'com_ax', 'com_ay']] = 0

    # Fill any remaining NaN values with 0
    df[['com_vx', 'com_vy', 'com_ax', 'com_ay']] = df[['com_vx', 'com_vy', 'com_ax', 'com_ay']].fillna(0)

    return df

In [None]:
df = calculate_velocity_acceleration(df)

In [None]:
print(df.columns.tolist())

In [None]:
# Check for NaN values in specific columns
nan_check = df[['com_vx', 'com_vy', 'com_ax', 'com_ay']].isna().any()

# Print the result
print("Columns with NaN values:")
print(nan_check)

In [None]:
# Summary statistics for velocity and acceleration columns
print(df[['com_vx', 'com_vy', 'com_ax', 'com_ay']].describe())

In [None]:
# smoothing the velocity and acceleration data
def smooth_data(df, columns, window_size=5, polyorder=2):
    smoothed_df = df.copy()
    for col in columns:
        smoothed_df[col] = savgol_filter(df[col], window_length=window_size, polyorder=polyorder)
    return smoothed_df

In [None]:
# calculating the resultant velocity and acceleration 
def calculate_resultant_velocity_acceleration(df):
    df['com_v'] = np.sqrt(df['com_vx']**2 + df['com_vy']**2)
    df['com_a'] = np.sqrt(df['com_ax']**2 + df['com_ay']**2)
    return df

In [None]:
df = calculate_resultant_velocity_acceleration(df)

In [None]:
# Smooth the data
smoothed_columns = ['com_vx', 'com_vy', 'com_ax', 'com_ay', 'com_v', 'com_a']
df = smooth_data(df, smoothed_columns, window_size=11, polyorder=2)

In [None]:
print(df.columns.tolist())

In [None]:
# Filter the DataFrame for the specified participant and boulder
participant = 'Janja Garnbret'
boulder = 'W3'
filtered_df = df[(df['participant'] == participant) & (df['boulder'] == boulder)]

# Plot the resultant velocity and acceleration
plt.figure(figsize=(12, 6))

plt.subplot(2, 1, 1)
plt.plot(filtered_df['time(s)'], filtered_df['com_v'], label='Velocity')
plt.xlabel('Time (s)')
plt.ylabel('Velocity')
plt.title(f'Resultant Velocity for {participant} on {boulder}')
plt.legend()

plt.subplot(2, 1, 2)
plt.plot(filtered_df['time(s)'], filtered_df['com_a'], label='Acceleration')
plt.xlabel('Time (s)')
plt.ylabel('Acceleration')
plt.title(f'Resultant Acceleration for {participant} on {boulder}')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
df.to_csv('../../data/dataframes/labels_and_coordinates_preprocessed.csv', index=False)

## 2 - Looking at visibility and presence per boulder

In [None]:
# Define the function to calculate average visibility and presence
def calculate_average_visibility_presence(df, boulder_id, landmarks):
    boulder_df = df[df['boulder'] == boulder_id]
    
    results = {}
    for landmark in landmarks:
        visibility_col = f'{landmark}_v'
        presence_col = f'{landmark}_p'
        
        avg_visibility = boulder_df[visibility_col].mean()
        avg_presence = boulder_df[presence_col].mean()
        
        results[landmark] = {
            'average_visibility': avg_visibility,
            'average_presence': avg_presence
        }
    
    return results

# Define the landmarks you are interested in
landmarks = ['left_knee', 'right_knee', 'left_ankle', 'right_ankle']

# List of boulders to analyze
boulders = df['boulder'].unique()

# Calculate the averages for each boulder
boulder_averages = {}
for boulder in boulders:
    boulder_averages[boulder] = calculate_average_visibility_presence(df, boulder, landmarks)

# Print the results
for boulder, averages in boulder_averages.items():
    print(f"Boulder: {boulder}")
    for landmark, values in averages.items():
        print(f"  {landmark}: Average Visibility = {values['average_visibility']}, Average Presence = {values['average_presence']}")