In [12]:
import pandas as pd
import gcsfs
import os

# Google Cloud Storage path
gcs_bucket_path = "gs://exercise-recognition-dataset/processed/"

try:
    # Initialize GCS filesystem
    fs = gcsfs.GCSFileSystem()

    # Get list of all CSV files in the bucket
    all_files = fs.ls(gcs_bucket_path)
    csv_files = [f"gs://{file}" for file in all_files if file.endswith(".csv")]  # Prepend gs://

    # Create a dictionary to store DataFrames
    dfs = {}

    # Loop through each CSV file and load it into a DataFrame
    for csv_file in csv_files:
        file_name = os.path.basename(csv_file)  # Extract filename
        df_name = file_name.replace(".csv", "")  # Create DataFrame name
        try:
            with fs.open(csv_file) as f:
                df = pd.read_csv(f)
                dfs[df_name] = df
            print(f"Successfully loaded {file_name} from GCS.")

            # Save the DataFrame to /content/
            output_path = f"/content/{file_name}"
            df.to_csv(output_path, index=False)  # Save DataFrame to local file
            print(f"Successfully saved {file_name} to /content/")

        except Exception as e:
            print(f"Error loading {file_name} from GCS: {e}")

except Exception as e:
    print(f"An error occurred: {e}")

Successfully loaded full_pose_dataset.csv from GCS.
Successfully saved full_pose_dataset.csv to /content/
Successfully loaded normalized_pose3d_dataset.csv from GCS.
Successfully saved normalized_pose3d_dataset.csv to /content/
Successfully loaded pose_joint_angles_dataset.csv from GCS.
Successfully saved pose_joint_angles_dataset.csv to /content/


In [13]:
# Importing necessary libraries
import numpy as np

# Loading the csv data
try:
    df = pd.read_csv('full_pose_dataset.csv')
except:
  print("File not found")

# Printing the first 5 rows of the data
df.head()

Unnamed: 0,frame,x0,y0,z0,v0,x1,y1,z1,v1,x2,...,x31,y31,z31,v31,x32,y32,z32,v32,label,video
0,frame_00009.jpg,0.551812,0.574754,-0.038027,0.998848,0.545737,0.564036,-0.038498,0.998614,0.549707,...,0.906977,0.586598,0.061681,0.340017,0.897279,0.616264,0.172381,0.171422,Kettlebell_swings,kettlebell_swings_8kg_2_102.csv
1,frame_00000.jpg,0.432927,0.46681,-0.205097,0.983814,0.437848,0.455041,-0.219827,0.984246,0.438948,...,0.419005,0.919513,0.210107,0.935987,0.43867,0.872235,0.398581,0.741659,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv
2,frame_00001.jpg,0.400312,0.493836,-0.132653,0.956673,0.404501,0.481483,-0.14879,0.962436,0.405526,...,0.417578,0.923985,0.18002,0.84339,0.44012,0.836672,0.431205,0.497793,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv
3,frame_00002.jpg,0.392901,0.511545,-0.131369,0.99653,0.395103,0.500852,-0.147117,0.997596,0.395558,...,0.413057,0.920415,0.102007,0.973134,0.405331,0.754452,0.482915,0.577804,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv
4,frame_00003.jpg,0.426719,0.472809,-0.170183,0.989553,0.431108,0.462486,-0.183992,0.989612,0.432652,...,0.417033,0.916374,0.181423,0.826518,0.419367,0.900731,0.32642,0.517659,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv


## Understanding Pose Landmark Indices

Before proceeding with feature extraction and engineering, it's crucial to understand the indices used to represent the pose landmarks in this dataset. These landmarks are based on the MediaPipe Pose model, which provides 33 3D coordinates representing different body joints and facial features.  For more detailed information, refer to the official MediaPipe documentation: [https://ai.google.dev/edge/mediapipe/solutions/vision/pose_landmarker](https://ai.google.dev/edge/mediapipe/solutions/vision/pose_landmarker)

Here's a comprehensive list of the landmark indices and their corresponding body parts:

*   **0: NOSE** - Nose tip
*   **1: LEFT_EYE_INNER** - Inner corner of the left eye
*   **2: LEFT_EYE** - Center of the left eye
*   **3: LEFT_EYE_OUTER** - Outer corner of the left eye
*   **4: RIGHT_EYE_INNER** - Inner corner of the right eye
*   **5: RIGHT_EYE** - Center of the right eye
*   **6: RIGHT_EYE_OUTER** - Outer corner of the right eye
*   **7: LEFT_EAR** - Left ear
*   **8: RIGHT_EAR** - Right ear
*   **9: MOUTH_LEFT** - Left corner of the mouth
*   **10: MOUTH_RIGHT** - Right corner of the mouth
*   **11: LEFT_SHOULDER** - Left shoulder
*   **12: RIGHT_SHOULDER** - Right shoulder
*   **13: LEFT_ELBOW** - Left elbow
*   **14: RIGHT_ELBOW** - Right elbow
*   **15: LEFT_WRIST** - Left wrist
*   **16: RIGHT_WRIST** - Right wrist
*   **17: LEFT_PINKY** - Left pinky fingertip
*   **18: RIGHT_PINKY** - Right pinky fingertip
*   **19: LEFT_INDEX** - Left index fingertip
*   **20: RIGHT_INDEX** - Right index fingertip
*   **21: LEFT_THUMB** - Left thumb fingertip
*   **22: RIGHT_THUMB** - Right thumb fingertip
*   **23: LEFT_HIP** - Left hip
*   **24: RIGHT_HIP** - Right hip
*   **25: LEFT_KNEE** - Left knee
*   **26: RIGHT_KNEE** - Right knee
*   **27: LEFT_ANKLE** - Left ankle
*   **28: RIGHT_ANKLE** - Right ankle
*   **29: LEFT_HEEL** - Left heel
*   **30: RIGHT_HEEL** - Right heel
*   **31: LEFT_FOOT_INDEX** - Left foot index tip
*   **32: RIGHT_FOOT_INDEX** - Right foot index tip

Each landmark is represented by x, y, and z coordinates, indicating its 3D position. Understanding these indices is essential for accessing specific landmarks and engineering relevant features for your machine learning models.  For example, `LEFT_SHOULDER` corresponds to index `11`.

In [24]:
# Custom function for calculations
def normalize_and_scale_pose_data(df):
    """
    Normalizes and scales pose data based on hip center and shoulder width.

    Args:
        df (pd.DataFrame): DataFrame containing pose data with columns 'x{i}', 'y{i}', 'z{i}' for keypoints.
                           It is assumed that the DataFrame also contains 'label', 'video', and 'frame' columns.

    Returns:
        pd.DataFrame: A new DataFrame with normalized and scaled pose coordinates, along with 'label', 'video_name', and 'frame_id' columns.
    """
    # Centering based on mid-hip point (x, y, z)
    df["hip_center_x"] = (df["x23"] + df["x24"]) / 2  # left_hip, right_hip
    df["hip_center_y"] = (df["y23"] + df["y24"]) / 2
    df["hip_center_z"] = (df["z23"] + df["z24"]) / 2

    # Normalize: subtract center for each keypoint
    keypoints = list(range(33))

    x_norm_cols = []
    y_norm_cols = []
    z_norm_cols = []

    for i in keypoints:
        x_norm_cols.append(df[f"x{i}"] - df["hip_center_x"])
        y_norm_cols.append(df[f"y{i}"] - df["hip_center_y"])
        z_norm_cols.append(df[f"z{i}"] - df["hip_center_z"])

    df["shoulder_width"] = np.sqrt(
        (df["x11"] - df["x12"]) ** 2 +
        (df["y11"] - df["y12"]) ** 2 +
        (df["z11"] - df["z12"]) ** 2
    )
    df["shoulder_width"] = df["shoulder_width"].replace(0, np.nan)

    x_scaled_cols = []
    y_scaled_cols = []
    z_scaled_cols = []
    for i in keypoints:
        x_scaled_cols.append((x_norm_cols[i] / df["shoulder_width"]))
        y_scaled_cols.append((y_norm_cols[i] / df["shoulder_width"]))
        z_scaled_cols.append((z_norm_cols[i] / df["shoulder_width"]))

    # Create new column names
    x_norm_names = [f"x{i}_norm" for i in keypoints]
    y_norm_names = [f"y{i}_norm" for i in keypoints]
    z_norm_names = [f"z{i}_norm" for i in keypoints]
    x_scaled_names = [f"x{i}_scaled" for i in keypoints]
    y_scaled_names = [f"y{i}_scaled" for i in keypoints]
    z_scaled_names = [f"z{i}_scaled" for i in keypoints]

    # Assign the new columns
    df = pd.concat([df,
                    pd.DataFrame({n: col for n, col in zip(x_norm_names, x_norm_cols)}),
                    pd.DataFrame({n: col for n, col in zip(y_norm_names, y_norm_cols)}),
                    pd.DataFrame({n: col for n, col in zip(z_norm_names, z_norm_cols)}),
                    pd.DataFrame({n: col for n, col in zip(x_scaled_names, x_scaled_cols)}),
                    pd.DataFrame({n: col for n, col in zip(y_scaled_names, y_scaled_cols)}),
                    pd.DataFrame({n: col for n, col in zip(z_scaled_names, z_scaled_cols)})
                    ], axis=1)

    # Keep only normalized coordinates + label
    scaled_cols = [f"{axis}{i}_scaled" for i in keypoints for axis in ['x', 'y', 'z']]
    df_final = df[scaled_cols + ['label']].copy() # Make a copy to avoid the warning

    # Joining the video and frame columns for infographics
    df_final['video_name'] = df['video']
    df_final['frame_id'] = df['frame']

    return df_final

# Load your data
full_pose_df = pd.read_csv("full_pose_dataset.csv")

# Process the data
df_final = normalize_and_scale_pose_data(full_pose_df)

# Save
df_final.to_csv("normalized_pose3d_dataset.csv", index=False)
print("Saved 3D normalized pose dataset to 'normalized_pose3d_dataset.csv'")

Saved 3D normalized pose dataset to 'normalized_pose3d_dataset.csv'


In [25]:
# Reviewing the results for the scaling and standardization
df_final.head()

Unnamed: 0,x0_scaled,y0_scaled,z0_scaled,x1_scaled,y1_scaled,z1_scaled,x2_scaled,y2_scaled,z2_scaled,x3_scaled,...,z30_scaled,x31_scaled,y31_scaled,z31_scaled,x32_scaled,y32_scaled,z32_scaled,label,video_name,frame_id
0,-1.090559,0.141936,-0.251086,-1.130577,0.071325,-0.25419,-1.104423,0.004063,-0.254609,-1.116826,...,1.256112,1.249263,0.219968,0.405788,1.185377,0.415402,1.135079,Kettlebell_swings,kettlebell_swings_8kg_2_102.csv,frame_00009.jpg
1,-0.100866,-1.187598,-1.107988,-0.074272,-1.251208,-1.187603,-0.068324,-1.249698,-1.187732,-0.062523,...,2.202294,-0.176118,1.259278,1.136198,-0.069828,1.003739,2.154909,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv,frame_00000.jpg
2,-0.368423,-0.924043,-0.638563,-0.348255,-0.98351,-0.71624,-0.343323,-0.982224,-0.716378,-0.338132,...,2.072849,-0.285308,1.146597,0.866578,-0.176795,0.726293,2.075723,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv,frame_00001.jpg
3,-0.423062,-0.817075,-0.61013,-0.412833,-0.866744,-0.683284,-0.410718,-0.864767,-0.683319,-0.408718,...,2.203915,-0.329433,1.082213,0.473949,-0.365323,0.311281,2.24335,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv,frame_00002.jpg
4,-0.178773,-1.176443,-0.902165,-0.155493,-1.231197,-0.975408,-0.147302,-1.228253,-0.975452,-0.138533,...,1.83666,-0.230152,1.176312,0.962824,-0.217769,1.093341,1.73192,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv,frame_00003.jpg


In [26]:
# Function to compute angle at joint 'b' using 3D vectors
def compute_angle_3d(a, b, c):
    vec1 = a - b
    vec2 = c - b
    if np.allclose(vec1, np.zeros(3)) or np.allclose(vec2, np.zeros(3)):
        return 0.0  # or np.nan
    dot = np.dot(vec1, vec2)
    norm1 = np.linalg.norm(vec1)
    norm2 = np.linalg.norm(vec2)
    cos_theta = dot / (norm1 * norm2 + 1e-6)
    cos_theta = np.clip(cos_theta, -1.0, 1.0)
    angle_rad = np.arccos(cos_theta)
    return np.degrees(angle_rad)

# Assuming df_final contains your scaled coordinates (x0_scaled, y0_scaled, z0_scaled, ..., x32_scaled, ...)
df = pd.read_csv("normalized_pose3d_dataset.csv")

# Ensure all scaled coordinate columns are floats
df = df.astype({col: 'float64' for col in df.columns if '_scaled' in col})

# Define joint triplets to compute angles
angle_features = {
    "left_elbow": (11, 13, 15),
    "right_elbow": (12, 14, 16),
    "left_shoulder": (13, 11, 23),
    "right_shoulder": (14, 12, 24),
    "left_knee": (23, 25, 27),
    "right_knee": (24, 26, 28),
    "left_hip": (11, 23, 25),
    "right_hip": (12, 24, 26)
}

# Compute each angle and store in new columns
angle_data = {}

for angle_name, (a_idx, b_idx, c_idx) in angle_features.items():
    angle_list = []
    for _, row in df.iterrows():
        try:
            a = np.array([float(row[f"x{a_idx}_scaled"]), float(row[f"y{a_idx}_scaled"]), float(row[f"z{a_idx}_scaled"])])
            b = np.array([float(row[f"x{b_idx}_scaled"]), float(row[f"y{b_idx}_scaled"]), float(row[f"z{b_idx}_scaled"])])
            c = np.array([float(row[f"x{c_idx}_scaled"]), float(row[f"y{c_idx}_scaled"]), float(row[f"z{c_idx}_scaled"])])
            angle = compute_angle_3d(a, b, c)
        except (ValueError, TypeError, KeyError):
            angle = np.nan
        angle_list.append(angle)
    angle_data[angle_name] = angle_list

# Convert the dictionary to a DataFrame
angle_df = pd.DataFrame(angle_data)
angle_df['label'] = df['label'].values

# Reviewing the data
angle_df.head()

Unnamed: 0,left_elbow,right_elbow,left_shoulder,right_shoulder,left_knee,right_knee,left_hip,right_hip,label
0,117.218065,156.373815,62.611731,93.72962,161.993283,149.129599,147.433581,122.580906,Kettlebell_swings
1,142.145983,96.627983,90.54983,42.48678,175.807961,173.478401,173.032439,156.542162,Kettlebell_swings
2,132.540424,143.331899,65.510193,35.207546,172.432744,169.53404,157.839868,143.841709,Kettlebell_swings
3,133.29593,133.691916,38.857494,21.241796,170.928282,165.120593,148.879765,120.987754,Kettlebell_swings
4,140.379019,85.734537,109.407607,65.831581,175.955606,172.173704,168.630181,158.178095,Kettlebell_swings


In [27]:
# Adding metadata
angle_df['frame_id'] = df['frame_id'].values
angle_df['video_name'] = df['video_name'].values

# Save the data
angle_df.to_csv("pose_joint_angles_dataset.csv", index=False)
print("saved to 'pose_joint_angles_dataset.csv'")

saved to 'pose_joint_angles_dataset.csv'


The import statements were modified to enhance runtime resilience and prevent application collapse.

In [28]:
# Load the normalized data
df = pd.read_csv('normalized_pose3d_dataset.csv')

In [29]:
df.head()  # First 5 rows of the dataset

Unnamed: 0,x0_scaled,y0_scaled,z0_scaled,x1_scaled,y1_scaled,z1_scaled,x2_scaled,y2_scaled,z2_scaled,x3_scaled,...,z30_scaled,x31_scaled,y31_scaled,z31_scaled,x32_scaled,y32_scaled,z32_scaled,label,video_name,frame_id
0,-1.090559,0.141936,-0.251086,-1.130577,0.071325,-0.25419,-1.104423,0.004063,-0.254609,-1.116826,...,1.256112,1.249263,0.219968,0.405788,1.185377,0.415402,1.135079,Kettlebell_swings,kettlebell_swings_8kg_2_102.csv,frame_00009.jpg
1,-0.100866,-1.187598,-1.107988,-0.074272,-1.251208,-1.187603,-0.068324,-1.249698,-1.187732,-0.062523,...,2.202294,-0.176118,1.259278,1.136198,-0.069828,1.003739,2.154909,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv,frame_00000.jpg
2,-0.368423,-0.924043,-0.638563,-0.348255,-0.98351,-0.71624,-0.343323,-0.982224,-0.716378,-0.338132,...,2.072849,-0.285308,1.146597,0.866578,-0.176795,0.726293,2.075723,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv,frame_00001.jpg
3,-0.423062,-0.817075,-0.61013,-0.412833,-0.866744,-0.683284,-0.410718,-0.864767,-0.683319,-0.408718,...,2.203915,-0.329433,1.082213,0.473949,-0.365323,0.311281,2.24335,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv,frame_00002.jpg
4,-0.178773,-1.176443,-0.902165,-0.155493,-1.231197,-0.975408,-0.147302,-1.228253,-0.975452,-0.138533,...,1.83666,-0.230152,1.176312,0.962824,-0.217769,1.093341,1.73192,Kettlebell_swings,kettlebell_swings_6kg_4_114.csv,frame_00003.jpg


In [31]:
import re

# Define the distance function
def compute_pairwise_distance(df, i, j):
    xi, yi, zi = df[f'x{i}_scaled'], df[f'y{i}_scaled'], df[f'z{i}_scaled']
    xj, yj, zj = df[f'x{j}_scaled'], df[f'y{j}_scaled'], df[f'z{j}_scaled']
    return np.sqrt((xi - xj)**2 + (yi - yj)**2 + (zi - zj)**2)

# Identify keypoint indices
num_keypoints = len([col for col in df.columns if re.match(r'x\d+_scaled', col)])
keypoints = list(range(num_keypoints))

distance_features = pd.DataFrame()
distance_features['frame_id'] = df['frame_id']
distance_features['video_name'] = df['video_name']

# Shoulder Width (keypoints 11: left shoulder, 12: right shoulder)
distance_features['shoulder_width'] = compute_pairwise_distance(df, 11, 12)

# Hip Width (keypoints 23: left hip, 24: right hip)
distance_features['hip_width'] = compute_pairwise_distance(df, 23, 24)

# Arm lengths
distance_features['upper_arm_left'] = compute_pairwise_distance(df, 11, 13)
distance_features['upper_arm_right'] = compute_pairwise_distance(df, 12, 14)
distance_features['lower_arm_left'] = compute_pairwise_distance(df, 13, 15)
distance_features['lower_arm_right'] = compute_pairwise_distance(df, 14, 16)

# Leg lengths
distance_features['upper_leg_left'] = compute_pairwise_distance(df, 23, 25)
distance_features['upper_leg_right'] = compute_pairwise_distance(df, 24, 26)
distance_features['lower_leg_left'] = compute_pairwise_distance(df, 25, 27)
distance_features['lower_leg_right'] = compute_pairwise_distance(df, 26, 28)

In [32]:
# Reviewing `distance_features` to check whether the operation is performed correctly
distance_features.head()

Unnamed: 0,frame_id,video_name,shoulder_width,hip_width,upper_arm_left,upper_arm_right,lower_arm_left,lower_arm_right,upper_leg_left,upper_leg_right,lower_leg_left,lower_leg_right
0,frame_00009.jpg,kettlebell_swings_8kg_2_102.csv,1.0,0.671795,0.794283,0.972799,0.809215,0.806995,0.59216,0.562619,0.775291,0.807284
1,frame_00000.jpg,kettlebell_swings_6kg_4_114.csv,1.0,0.657055,0.421644,0.414544,0.283361,0.301168,0.995175,1.024615,0.952504,0.994267
2,frame_00001.jpg,kettlebell_swings_6kg_4_114.csv,1.0,0.643557,0.347546,0.4705,0.299515,0.21493,0.824979,0.957481,0.800343,0.892699
3,frame_00002.jpg,kettlebell_swings_6kg_4_114.csv,1.0,0.652112,0.480836,0.476549,0.512914,0.350398,0.6773,0.943854,0.671871,0.921888
4,frame_00003.jpg,kettlebell_swings_6kg_4_114.csv,1.0,0.649923,0.409679,0.401388,0.311661,0.286707,0.892539,0.905867,0.877087,0.882983


In [33]:
# Initialize symmetry features DataFrame
symmetry_features = pd.DataFrame()
# Add frame IDs
symmetry_features['frame_id'] = df['frame_id']
# Add video names
symmetry_features['video_name'] = df['video_name']

# Calculate upper arm symmetry
symmetry_features['symmetry_upper_arm'] = np.abs(
    distance_features['upper_arm_left'] - distance_features['upper_arm_right']
)
# Calculate lower arm symmetry
symmetry_features['symmetry_lower_arm'] = np.abs(
    distance_features['lower_arm_left'] - distance_features['lower_arm_right']
)
# Calculate upper leg symmetry
symmetry_features['symmetry_upper_leg'] = np.abs(
    distance_features['upper_leg_left'] - distance_features['upper_leg_right']
)
# Calculate lower leg symmetry
symmetry_features['symmetry_lower_leg'] = np.abs(
    distance_features['lower_leg_left'] - distance_features['lower_leg_right']
)

In [34]:
# Reviewing `symmetry_features` to check whether the operation is performed correctly
symmetry_features.head()

Unnamed: 0,frame_id,video_name,symmetry_upper_arm,symmetry_lower_arm,symmetry_upper_leg,symmetry_lower_leg
0,frame_00009.jpg,kettlebell_swings_8kg_2_102.csv,0.178517,0.00222,0.029541,0.031993
1,frame_00000.jpg,kettlebell_swings_6kg_4_114.csv,0.0071,0.017807,0.02944,0.041763
2,frame_00001.jpg,kettlebell_swings_6kg_4_114.csv,0.122954,0.084585,0.132502,0.092357
3,frame_00002.jpg,kettlebell_swings_6kg_4_114.csv,0.004287,0.162516,0.266553,0.250016
4,frame_00003.jpg,kettlebell_swings_6kg_4_114.csv,0.00829,0.024954,0.013328,0.005896


In [35]:
# Saving the datasets
distance_features.to_csv("pose3d_distance_features.csv", index=False)
symmetry_features.to_csv("pose3d_symmetry_features.csv", index=False)

Resolved runtime instability by restructuring import dependencies.  See code comments for details.

In [37]:
# Load normalized dataset
df = pd.read_csv('normalized_pose3d_dataset.csv')

# Sort by video and frame to maintain temporal order
df.sort_values(by=['video_name', 'frame_id'], inplace=True)
df.reset_index(drop=True, inplace=True)

# Extract keypoint indices from x*_scaled pattern
x_columns = [col for col in df.columns if re.match(r'x\d+_scaled', col)]
keypoint_indices = [int(re.search(r'\d+', col).group()) for col in x_columns]
num_keypoints = len(keypoint_indices)

# Function to compute temporal differences (velocity or acceleration)
def compute_temporal_diff(dataframe, group_keys, order_col, step=1, prefix='vel'):
    temporal_df = dataframe.copy()
    new_cols = []  # List to store new columns (Series)

    for axis in ['x', 'y', 'z']:
        for i in keypoint_indices:
            col_name = f'{axis}{i}_scaled'
            new_col = dataframe.groupby(group_keys)[col_name].diff(periods=step)
            new_col.name = f'{prefix}_{axis}{i}'  # Set the name of the Series
            new_cols.append(new_col)

    # Concatenate all new columns to the DataFrame at once
    temporal_df = pd.concat([temporal_df] + new_cols, axis=1)
    return temporal_df

vel_df = compute_temporal_diff(df, group_keys=['video_name'], order_col='frame_id', step=1, prefix='vel')

In [38]:
# Reviewing the `vel_df` to check whether the operation is performed correctly
vel_df.head()

Unnamed: 0,x0_scaled,y0_scaled,z0_scaled,x1_scaled,y1_scaled,z1_scaled,x2_scaled,y2_scaled,z2_scaled,x3_scaled,...,vel_z23,vel_z24,vel_z25,vel_z26,vel_z27,vel_z28,vel_z29,vel_z30,vel_z31,vel_z32
0,-0.29023,-0.908041,-1.103772,-0.272303,-0.967773,-1.157645,-0.261884,-0.966418,-1.15739,-0.250042,...,,,,,,,,,,
1,-0.299645,-0.876496,-1.323764,-0.282579,-0.938997,-1.383577,-0.272361,-0.938779,-1.383261,-0.260979,...,-0.0002,0.0002,0.476903,0.636635,0.360294,0.749022,0.342277,0.755543,0.30835,0.776221
2,-0.28416,-0.873014,-1.43261,-0.266014,-0.930987,-1.481315,-0.254959,-0.932323,-1.48112,-0.243228,...,-0.001689,0.001689,0.086886,-0.20221,0.173708,-0.264227,0.182546,-0.267101,0.226944,-0.261503
3,-0.246819,-0.866061,-1.383165,-0.227279,-0.922747,-1.444332,-0.216048,-0.920523,-1.444169,-0.203206,...,0.008355,-0.008355,-0.134447,-0.332089,-0.001147,-0.387,0.020437,-0.388903,0.085602,-0.378223
4,-0.272222,-0.927351,-1.527996,-0.250786,-0.984679,-1.585891,-0.238511,-0.98404,-1.58554,-0.224116,...,-0.006573,0.006573,-0.25858,0.920072,-0.439772,1.232824,-0.456271,1.263031,-0.595112,1.25526


In [40]:
acc_df = compute_temporal_diff(vel_df, group_keys=['video_name'], order_col='frame_id', step=1, prefix='acc')

# Reviewing the `acc_df` to check whether the operation is performed correctly
acc_df.head()

Unnamed: 0,x0_scaled,y0_scaled,z0_scaled,x1_scaled,y1_scaled,z1_scaled,x2_scaled,y2_scaled,z2_scaled,x3_scaled,...,acc_z23,acc_z24,acc_z25,acc_z26,acc_z27,acc_z28,acc_z29,acc_z30,acc_z31,acc_z32
0,-0.29023,-0.908041,-1.103772,-0.272303,-0.967773,-1.157645,-0.261884,-0.966418,-1.15739,-0.250042,...,,,,,,,,,,
1,-0.299645,-0.876496,-1.323764,-0.282579,-0.938997,-1.383577,-0.272361,-0.938779,-1.383261,-0.260979,...,-0.0002,0.0002,0.476903,0.636635,0.360294,0.749022,0.342277,0.755543,0.30835,0.776221
2,-0.28416,-0.873014,-1.43261,-0.266014,-0.930987,-1.481315,-0.254959,-0.932323,-1.48112,-0.243228,...,-0.001689,0.001689,0.086886,-0.20221,0.173708,-0.264227,0.182546,-0.267101,0.226944,-0.261503
3,-0.246819,-0.866061,-1.383165,-0.227279,-0.922747,-1.444332,-0.216048,-0.920523,-1.444169,-0.203206,...,0.008355,-0.008355,-0.134447,-0.332089,-0.001147,-0.387,0.020437,-0.388903,0.085602,-0.378223
4,-0.272222,-0.927351,-1.527996,-0.250786,-0.984679,-1.585891,-0.238511,-0.98404,-1.58554,-0.224116,...,-0.006573,0.006573,-0.25858,0.920072,-0.439772,1.232824,-0.456271,1.263031,-0.595112,1.25526


### Handling NaNs in Temporal Features

When computing velocity and acceleration using `.diff()`, the first frame in each video has no previous frame to compare, resulting in `NaN` values.

To ensure smooth downstream processing and prevent data loss, we replace these `NaN`s with `0`

In [41]:
vel_df.fillna(0, inplace=True)
acc_df.fillna(0, inplace=True)

In [42]:
# Summary statistics
def summarize_motion_features(df, prefix):
    summary = pd.DataFrame()
    summary['frame_id'] = df['frame_id']
    summary['video_name'] = df['video_name']

    for i in keypoint_indices:
        v_x, v_y, v_z = df[f'{prefix}_x{i}'], df[f'{prefix}_y{i}'], df[f'{prefix}_z{i}']
        summary[f'{prefix}_norm_{i}'] = np.sqrt(v_x**2 + v_y**2 + v_z**2)

    # Optionally compute aggregate metrics like mean, max, std across all keypoints
    summary[f'{prefix}_mean_norm'] = summary[[col for col in summary.columns if f'{prefix}_norm_' in col]].mean(axis=1)
    return summary

vel_summary = summarize_motion_features(vel_df, 'vel')
acc_summary = summarize_motion_features(acc_df, 'acc')

In [43]:
print(vel_summary.head())
print(acc_summary.head())

          frame_id               video_name  vel_norm_0  vel_norm_1  \
0  frame_00000.jpg  Bench_Press_100_111.csv    0.000000    0.000000   
1  frame_00001.jpg  Bench_Press_100_111.csv    0.222441    0.227989   
2  frame_00002.jpg  Bench_Press_100_111.csv    0.109997    0.099455   
3  frame_00003.jpg  Bench_Press_100_111.csv    0.062349    0.054185   
4  frame_00004.jpg  Bench_Press_100_111.csv    0.159304    0.156291   

   vel_norm_2  vel_norm_3  vel_norm_4  vel_norm_5  vel_norm_6  vel_norm_7  \
0    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000   
1    0.227798    0.227581    0.228012    0.228427    0.228970    0.211878   
2    0.099603    0.099689    0.108793    0.108757    0.108503    0.044058   
3    0.054943    0.056548    0.059503    0.060360    0.061144    0.053422   
4    0.156604    0.156532    0.153559    0.152851    0.152211    0.103482   

   ...  vel_norm_24  vel_norm_25  vel_norm_26  vel_norm_27  vel_norm_28  \
0  ...     0.000000     0.000000   

In [44]:
# Saving the velocity and acceleration dataset
vel_summary.to_csv("pose3d_velocity_features.csv", index=False)
acc_summary.to_csv("pose3d_acceleration_features.csv", index=False)