# Baseline Model

In [322]:
import numpy as np
import pandas as pd
import joblib
from tqdm import tqdm

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import balanced_accuracy_score, confusion_matrix
from sklearn.pipeline import Pipeline


In [323]:
# Load data
X_train = joblib.load("Data/Xtrain2.pkl")
Y_train = np.load('Data/Ytrain2.npy')

print(X_train.shape)
print(Y_train.shape)

(444, 3)
(14,)


In [324]:
# FILTERING OPTIONS - MODIFY THESE TO REMOVE EXERCISES OR PATIENTS

# Option 1: Remove specific exercises
# exercises_to_remove = ['E1', 'E2']  # Remove exercises E1 and E2
exercises_to_remove = []  # Keep all exercises

# Option 2: Remove specific patients
# patients_to_remove = [1, 5, 10]  # Remove patients 1, 5, and 10
patients_to_remove = []  # Keep all patients

In [325]:
# REVERSE MASKING CONFIGURATION - Specify which features to KEEP
EXERCISE_FEATURES_TO_KEEP_20_BEST = {
    # E1: Brushing hair - Keep only the top features from analysis
    'E1': {
        # Format: {keypoint_index: [components_to_keep]}
        23: ['mean_x', 'mean_y'],  # right_hip
        25: ['mean_x'],            # left_knee (only mean_x was in top)
        14: ['mean_x'],            # right_elbow (only mean_x was in top)
        26: ['mean_x', 'mean_y'],  # right_knee
        24: ['mean_x'],            # left_hip (only mean_x was in top)
        25: ['var_y'],             # left_knee_var_y
        21: ['mean_x'],            # left_thumb
        19: ['mean_x'],            # left_index
        23: ['mean_y'],            # right_hip_mean_y
        15: ['mean_x'],            # left_wrist
        17: ['mean_x'],            # left_pinky
        29: ['mean_x'],            # left_heel
        26: ['var_x'],             # right_knee_var_x
        28: ['var_x'],             # right_ankle_var_x
        30: ['var_x'],             # right_heel_var_x
        11: ['var_x'],             # left_shoulder_var_x
        26: ['mean_y'],            # right_knee_mean_y
        29: ['var_x'],             # left_heel_var_x
        27: ['mean_x'],            # left_ankle
        30: ['mean_x'],            # right_heel
    },
    
    # E2: Brushing teeth - Keep only top features
    'E2': {
        26: ['mean_x'],            # right_knee_mean_x
        12: ['mean_y'],            # right_shoulder_mean_y
        23: ['mean_y'],            # right_hip_mean_y
        14: ['mean_x'],            # right_elbow_mean_x? (check index)
        23: ['mean_x'],            # right_hip_mean_x
        21: ['mean_x'],            # left_thumb_mean_x
        24: ['mean_x'],            # left_hip_mean_x
        26: ['mean_y'],            # right_knee_mean_y
        15: ['mean_x'],            # left_wrist_mean_x
        24: ['mean_y'],            # left_hip_mean_y
        20: ['mean_x'],            # right_index_mean_x
        22: ['mean_x'],            # right_thumb_mean_x
        19: ['mean_x'],            # left_index_mean_x
        17: ['mean_x'],            # left_pinky_mean_x
        30: ['mean_x'],            # right_heel_mean_x
        16: ['mean_x'],            # right_wrist_mean_x
        31: ['mean_y'],            # left_foot_index_mean_y
        18: ['mean_x'],            # right_pinky_mean_x
        21: ['var_y'],             # left_thumb_var_y
        23: ['var_x'],             # right_hip_var_x
    },
    
    # E3: Keep top features
    'E3': {
        25: ['var_y'],             # left_knee_var_y
        23: ['mean_x'],            # right_hip_mean_x
        32: ['mean_y'],            # right_foot_index_mean_y
        25: ['mean_x'],            # left_knee_mean_x
        24: ['mean_x'],            # left_hip_mean_x
        26: ['mean_x'],            # right_knee_mean_x
        32: ['var_x'],             # right_foot_index_var_x
        28: ['mean_y'],            # right_ankle_mean_y
        30: ['mean_y'],            # right_heel_mean_y
        19: ['mean_x'],            # left_index_mean_x
        11: ['mean_x'],            # left_shoulder_mean_x
        25: ['mean_y'],            # left_knee_mean_y
        26: ['mean_y'],            # right_knee_mean_y
        7: ['var_y'],              # left_ear_var_y
        32: ['mean_x'],            # right_foot_index_mean_x
        20: ['mean_x'],            # right_index_mean_x
        0: ['var_x'],              # nose_var_x
        8: ['var_x'],              # right_ear_var_x
        27: ['var_x'],             # left_ankle_var_x
        21: ['mean_x'],            # left_thumb_mean_x
    },
    
    # E4: Keep top features
    'E4': {
        31: ['var_x'],             # left_foot_index_var_x
        25: ['mean_x'],            # left_knee_mean_x
        14: ['mean_x'],            # right_elbow_mean_x
        30: ['mean_y'],            # right_heel_mean_y
        31: ['var_y'],             # left_foot_index_var_y
        28: ['mean_y'],            # right_ankle_mean_y
        25: ['var_y'],             # left_knee_var_y
        20: ['var_x'],             # right_index_var_x
        23: ['mean_y'],            # right_hip_mean_y
        26: ['mean_x'],            # right_knee_mean_x
        19: ['mean_x'],            # left_index_mean_x
        30: ['var_y'],             # right_heel_var_y
        24: ['mean_y'],            # left_hip_mean_y
        19: ['mean_y'],            # left_index_mean_y
        17: ['var_x'],             # left_pinky_var_x
        13: ['mean_x'],            # left_elbow_mean_x
        28: ['var_y'],             # right_ankle_var_y
        29: ['var_y'],             # left_heel_var_y
        17: ['mean_x'],            # left_pinky_mean_x
        19: ['var_y'],             # left_index_var_y
    },
    
    # E5: Hip flexion - Keep top features
    'E5': {
        25: ['var_x'],             # left_knee_var_x
        26: ['mean_y'],            # right_knee_mean_y
        20: ['mean_x'],            # right_index_mean_x
        32: ['var_x'],             # right_foot_index_var_x
        25: ['mean_x'],            # left_knee_mean_x
        30: ['var_x'],             # right_heel_var_x
        23: ['var_x'],             # right_hip_var_x
        23: ['mean_y'],            # right_hip_mean_y
        22: ['mean_x'],            # right_thumb_mean_x
        18: ['mean_x'],            # right_pinky_mean_x
        24: ['var_x'],             # left_hip_var_x
        28: ['var_x'],             # right_ankle_var_x
        16: ['mean_x'],            # right_wrist_mean_x
        25: ['mean_y'],            # left_knee_mean_y
        24: ['mean_y'],            # left_hip_mean_y
        13: ['mean_y'],            # left_elbow_mean_y
        15: ['mean_y'],            # left_wrist_mean_y
        28: ['mean_y'],            # right_ankle_mean_y
        8: ['var_y'],              # right_ear_var_y
        26: ['var_y'],             # right_knee_var_y
    },
}

In [326]:
# OPTIMIZED MASKING WITH TOP 30 FEATURES PER EXERCISE
EXERCISE_FEATURES_TO_KEEP_30_BEST = {
    # E1: Brushing hair - Keep top 30 features
    'E1': {
        23: ['mean_x', 'mean_y'],  # right_hip (48,49)
        25: ['mean_x', 'var_y'],   # left_knee (50,117)
        14: ['mean_x'],            # right_elbow (28)
        26: ['mean_x', 'mean_y'],  # right_knee (52,53)
        24: ['mean_x', 'mean_y'],  # left_hip (46,47)
        21: ['mean_x'],            # left_thumb (42)
        19: ['mean_x'],            # left_index (38)
        15: ['mean_x'],            # left_wrist (30)
        17: ['mean_x'],            # left_pinky (34)
        29: ['mean_x', 'var_x', 'var_y'],  # left_heel (58,124,125)
        26: ['var_x'],             # right_knee_var_x (118)
        28: ['var_x'],             # right_ankle_var_x (122)
        30: ['var_x'],             # right_heel_var_x (126)
        11: ['var_x', 'var_y'],    # left_shoulder (88,89)
        27: ['mean_x', 'mean_y', 'var_y'], # left_ankle (54,55,121)
        30: ['mean_x'],            # right_heel_mean_x (60)
        16: ['mean_x'],            # right_wrist_mean_x (32)
        22: ['mean_x'],            # right_thumb_mean_x (44)
        28: ['mean_x'],            # right_ankle_mean_x (56)
        11: ['mean_y'],            # left_shoulder_mean_y (23)
    },
    
    # E2: Brushing teeth - Keep top 30 features
    'E2': {
        26: ['mean_x', 'mean_y'],  # right_knee (52,53)
        12: ['mean_y'],            # right_shoulder (25)
        23: ['mean_x', 'mean_y', 'var_x'], # right_hip (48,49,114)
        13: ['mean_x'],            # left_elbow (26)
        21: ['mean_x', 'var_y'],   # left_thumb (42,109)
        24: ['mean_x', 'mean_y', 'var_x'], # left_hip (46,47,112)
        15: ['mean_x'],            # left_wrist (30)
        20: ['mean_x'],            # right_index (40)
        22: ['mean_x'],            # right_thumb (44)
        19: ['mean_x'],            # left_index (38)
        17: ['mean_x'],            # left_pinky (34)
        30: ['mean_x'],            # right_heel (60)
        16: ['mean_x'],            # right_wrist (32)
        31: ['mean_y'],            # left_foot_index (63)
        18: ['mean_x'],            # right_pinky (36)
        15: ['var_y'],             # left_wrist_var_y (97)
        25: ['mean_y'],            # left_knee_mean_y (51)
        19: ['var_y'],             # left_index_var_y (105)
        32: ['var_y'],             # right_foot_index_var_y (131)
        17: ['var_y'],             # left_pinky_var_y (101)
        31: ['var_y'],             # left_foot_index_var_y (129)
        12: ['var_x'],             # right_shoulder_var_x (90)
        14: ['mean_y'],            # right_elbow_mean_y (29)
        28: ['mean_x'],            # right_ankle_mean_x (56)
    },
    
    # E3: Keep top 30 features
    'E3': {
        25: ['var_y', 'mean_x', 'mean_y'], # left_knee (117,50,51)
        23: ['mean_x', 'mean_y'],  # right_hip (48,49)
        32: ['mean_y', 'mean_x', 'var_x'], # right_foot_index (65,64,130)
        24: ['mean_x', 'var_x'],   # left_hip (46,112)
        26: ['mean_x', 'mean_y', 'var_y'], # right_knee (52,53,119)
        28: ['mean_y'],            # right_ankle_mean_y (57)
        30: ['mean_y'],            # right_heel_mean_y (61)
        19: ['mean_x'],            # left_index (38)
        11: ['mean_x', 'var_x'],   # left_shoulder (22,88)
        7: ['var_y'],              # left_ear_var_y (81)
        8: ['var_x', 'var_y'],     # right_ear (82,83)
        27: ['var_x'],             # left_ankle_var_x (120)
        21: ['mean_x'],            # left_thumb (42)
        18: ['mean_x'],            # right_pinky (36)
        0: ['var_x'],              # nose_var_x (66)
        9: ['var_x'],              # mouth_right_var_x (86)
        17: ['mean_x'],            # left_pinky (34)
        31: ['mean_y'],            # left_foot_index_mean_y (63)
    },
    
    # E4: Keep top 30 features
    'E4': {
        31: ['var_x', 'var_y'],    # left_foot_index (128,129)
        25: ['mean_x', 'var_y'],   # left_knee (50,117)
        14: ['mean_x'],            # right_elbow (28)
        30: ['mean_y', 'var_y'],   # right_heel (61,127)
        28: ['mean_y', 'var_y'],   # right_ankle (57,123)
        20: ['var_x'],             # right_index_var_x (106)
        23: ['mean_y'],            # right_hip_mean_y (49)
        26: ['mean_x'],            # right_knee_mean_x (52)
        19: ['mean_x', 'mean_y', 'var_y'], # left_index (38,39,105)
        17: ['mean_x', 'mean_y', 'var_x'], # left_pinky (34,35,100)
        13: ['mean_x'],            # left_elbow (26)
        21: ['mean_x'],            # left_thumb (42)
        13: ['var_x', 'var_y'],    # left_elbow (92,93)
        31: ['mean_y'],            # left_foot_index_mean_y (63)
        32: ['mean_x'],            # right_foot_index_mean_x (64)
        21: ['var_x'],             # left_thumb_var_x (108)
        16: ['mean_y'],            # right_wrist_mean_y (33)
        23: ['var_x'],             # right_hip_var_x (114)
        12: ['var_y'],             # right_shoulder_var_y (91)
    },
    
    # E5: Hip flexion - Keep top 30 features
    'E5': {
        25: ['var_x', 'mean_x', 'mean_y'], # left_knee (116,50,51)
        26: ['mean_y', 'var_y'],   # right_knee (53,119)
        20: ['mean_x'],            # right_index (40)
        32: ['var_x'],             # right_foot_index_var_x (130)
        30: ['var_x'],             # right_heel_var_x (126)
        23: ['var_x', 'mean_y'],   # right_hip (114,49)
        22: ['mean_x'],            # right_thumb (44)
        18: ['mean_x'],            # right_pinky (36)
        24: ['var_x', 'mean_y'],   # left_hip (112,47)
        28: ['var_x', 'mean_y'],   # right_ankle (122,57)
        16: ['mean_x'],            # right_wrist (32)
        13: ['mean_y'],            # left_elbow_mean_y (27)
        15: ['mean_y'],            # left_wrist_mean_y (31)
        8: ['var_y'],              # right_ear_var_y (83)
        14: ['mean_y'],            # right_elbow_mean_y (29)
        0: ['mean_x'],             # nose_mean_x (0)
        6: ['mean_x'],             # right_eye_outer_mean_x (12)
        5: ['mean_x'],             # right_eye_mean_x (10)
        9: ['mean_x'],             # mouth_right_mean_x (20)
        19: ['mean_x'],            # left_index (38)
        4: ['mean_x'],             # right_eye_inner_mean_x (8)
        1: ['mean_x'],             # left_eye_inner_mean_x (2)
        21: ['mean_y'],            # left_thumb_mean_y (43)
        29: ['mean_x'],            # left_heel_mean_x (58)
    },
}

In [327]:
# OPTIMIZED MASKING WITH TOP 40 FEATURES PER EXERCISE
EXERCISE_FEATURES_TO_KEEP_40_BEST = {
    # E1: Brushing hair - Keep top 40 features
    'E1': {
        23: ['mean_x', 'mean_y', 'var_x'],  # right_hip (48,49,114)
        25: ['mean_x', 'mean_y', 'var_y'],  # left_knee (50,51,117)
        14: ['mean_x'],                     # right_elbow (28)
        26: ['mean_x', 'mean_y', 'var_x'],  # right_knee (52,53,118)
        24: ['mean_x', 'mean_y', 'var_x'],  # left_hip (46,47,112)
        21: ['mean_x'],                     # left_thumb (42)
        19: ['mean_x', 'var_x'],            # left_index (38,104)
        15: ['mean_x'],                     # left_wrist (30)
        17: ['mean_x', 'var_x'],            # left_pinky (34,100)
        29: ['mean_x', 'mean_y', 'var_x', 'var_y'],  # left_heel (58,59,124,125)
        28: ['var_x', 'mean_x'],            # right_ankle (122,56)
        30: ['var_x', 'mean_x'],            # right_heel (126,60)
        11: ['var_x', 'var_y', 'mean_y'],   # left_shoulder (88,89,23)
        27: ['mean_x', 'mean_y', 'var_y'],  # left_ankle (54,55,121)
        16: ['mean_x'],                     # right_wrist (32)
        22: ['mean_x'],                     # right_thumb (44)
        20: ['mean_x'],                     # right_index (40)
        7: ['mean_x'],                      # left_ear (14)
        13: ['mean_y'],                     # left_elbow (27)
        18: ['mean_x'],                     # right_pinky (36)
    },
    
    # E2: Brushing teeth - Keep top 40 features
    'E2': {
        26: ['mean_x', 'mean_y'],           # right_knee (52,53)
        12: ['mean_y', 'var_x'],            # right_shoulder (25,90)
        23: ['mean_x', 'mean_y', 'var_x'],  # right_hip (48,49,114)
        13: ['mean_x'],                     # left_elbow (26)
        21: ['mean_x', 'var_y'],            # left_thumb (42,109)
        24: ['mean_x', 'mean_y', 'var_x'],  # left_hip (46,47,112)
        15: ['mean_x', 'var_y'],            # left_wrist (30,97)
        20: ['mean_x'],                     # right_index (40)
        22: ['mean_x'],                     # right_thumb (44)
        19: ['mean_x', 'mean_y', 'var_y'],  # left_index (38,39,105)
        17: ['mean_x', 'mean_y', 'var_y'],  # left_pinky (34,35,101)
        30: ['mean_x'],                     # right_heel (60)
        16: ['mean_x'],                     # right_wrist (32)
        31: ['mean_y', 'var_y'],            # left_foot_index (63,129)
        18: ['mean_x'],                     # right_pinky (36)
        14: ['mean_y'],                     # right_elbow (29)
        28: ['mean_x'],                     # right_ankle (56)
        7: ['var_y'],                       # left_ear (81)
        11: ['var_y'],                      # left_shoulder (89)
        27: ['mean_y'],                     # left_ankle (55)
        8: ['mean_y'],                      # right_ear (17)
        20: ['var_y'],                      # right_index (107)
        32: ['var_y'],                      # right_foot_index (131)
    },
    
    # E3: Keep top 40 features
    'E3': {
        25: ['var_y', 'mean_x', 'mean_y'],  # left_knee (117,50,51)
        23: ['mean_x', 'mean_y'],           # right_hip (48,49)
        32: ['mean_y', 'mean_x', 'var_x'],  # right_foot_index (65,64,130)
        24: ['mean_x', 'var_x'],            # left_hip (46,112)
        26: ['mean_x', 'mean_y', 'var_y'],  # right_knee (52,53,119)
        28: ['mean_y'],                     # right_ankle_mean_y (57)
        30: ['mean_y', 'mean_x'],           # right_heel (61,60)
        19: ['mean_x'],                     # left_index (38)
        11: ['mean_x', 'var_x'],            # left_shoulder (22,88)
        7: ['var_y', 'mean_x'],             # left_ear (81,14)
        8: ['var_x', 'var_y'],              # right_ear (82,83)
        27: ['var_x'],                      # left_ankle_var_x (120)
        21: ['mean_x'],                     # left_thumb (42)
        18: ['mean_x'],                     # right_pinky (36)
        0: ['var_x', 'mean_x'],             # nose (66,0)
        9: ['var_x'],                       # mouth_right (86)
        17: ['mean_x'],                     # left_pinky (34)
        31: ['mean_y', 'var_y'],            # left_foot_index (63,129)
        9: ['mean_x'],                      # mouth_left (18)
        5: ['mean_x'],                      # right_eye (20)
        13: ['mean_x'],                     # left_elbow (26)
        12: ['var_y'],                      # right_shoulder (91)
        29: ['var_x'],                      # left_heel (124)
        22: ['mean_x'],                     # right_thumb (44)
    },
    
    # E4: Keep top 40 features
    'E4': {
        31: ['var_x', 'var_y'],             # left_foot_index (128,129)
        25: ['mean_x', 'var_y', 'var_x'],   # left_knee (50,117,116)
        14: ['mean_x'],                     # right_elbow (28)
        30: ['mean_y', 'var_y'],            # right_heel (61,127)
        28: ['mean_y', 'var_y', 'var_x'],   # right_ankle (57,123,122)
        20: ['var_x', 'var_y'],             # right_index (106,107)
        23: ['mean_y', 'var_x'],            # right_hip (49,114)
        26: ['mean_x'],                     # right_knee_mean_x (52)
        19: ['mean_x', 'mean_y', 'var_y'],  # left_index (38,39,105)
        17: ['mean_x', 'mean_y', 'var_x'],  # left_pinky (34,35,100)
        13: ['mean_x', 'var_x', 'var_y'],   # left_elbow (26,92,93)
        21: ['mean_x', 'var_x'],            # left_thumb (42,108)
        31: ['mean_y'],                     # left_foot_index_mean_y (63)
        32: ['mean_x'],                     # right_foot_index_mean_x (64)
        16: ['mean_y', 'mean_x'],           # right_wrist (33,32)
        12: ['var_y'],                      # right_shoulder (91)
        11: ['var_x'],                      # left_shoulder (88)
        15: ['mean_x'],                     # left_wrist (30)
        15: ['var_x'],                      # left_wrist (96)
        20: ['mean_y'],                     # right_index (41)
        22: ['mean_y'],                     # right_thumb (45)
    },
    
    # E5: Hip flexion - Keep top 40 features
    'E5': {
        25: ['var_x', 'mean_x', 'mean_y', 'var_y'], # left_knee (116,50,51,117)
        26: ['mean_y', 'var_y', 'mean_x'], # right_knee (53,119,52)
        20: ['mean_x'],                    # right_index (40)
        32: ['var_x'],                     # right_foot_index_var_x (130)
        30: ['var_x', 'mean_y'],           # right_heel (126,61)
        23: ['var_x', 'mean_y'],           # right_hip (114,49)
        22: ['mean_x', 'var_x'],           # right_thumb (44,110)
        18: ['mean_x'],                    # right_pinky (36)
        24: ['var_x', 'mean_y'],           # left_hip (112,47)
        28: ['var_x', 'mean_y'],           # right_ankle (122,57)
        16: ['mean_x'],                    # right_wrist (32)
        13: ['mean_y'],                    # left_elbow_mean_y (27)
        15: ['mean_y', 'var_y'],           # left_wrist (31,97)
        8: ['var_y'],                      # right_ear_var_y (83)
        14: ['mean_y'],                    # right_elbow_mean_y (29)
        0: ['mean_x'],                     # nose_mean_x (0)
        6: ['mean_x'],                     # right_eye_outer_mean_x (12)
        5: ['mean_x'],                     # right_eye_mean_x (10)
        9: ['mean_x'],                     # mouth_right_mean_x (20)
        19: ['mean_x'],                    # left_index (38)
        4: ['mean_x'],                     # right_eye_inner_mean_x (8)
        1: ['mean_x'],                     # left_eye_inner_mean_x (2)
        21: ['mean_y', 'mean_x'],          # left_thumb (43,42)
        29: ['mean_x'],                    # left_heel_mean_x (58)
        3: ['mean_x'],                     # left_eye_outer_mean_x (6)
        17: ['mean_x', 'mean_y'],          # left_pinky (34,35)
    },
}

In [328]:
# Apply filters if specified
if exercises_to_remove:
    original_count = len(X_train)
    X_train = X_train[~X_train['Exercise_Id'].isin(exercises_to_remove)]
    print(f"Removed exercises {exercises_to_remove}. Sequences: {original_count} -> {len(X_train)}")

if patients_to_remove:
    original_count = len(X_train)
    X_train = X_train[~X_train['Patient_Id'].isin(patients_to_remove)]
    print(f"Removed patients {patients_to_remove}. Sequences: {original_count} -> {len(X_train)}")

# Get patient IDs from the filtered data
patient_ids = np.sort(X_train['Patient_Id'].unique())
print(f"Patient IDs in filtered data: {patient_ids}")

Patient IDs in filtered data: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14]


In [329]:
# Create patient-to-label mapping from the original Y_train
# We need to filter Y_train to only include patients that remain after filtering
all_patient_to_label = dict(zip(range(1, 15), Y_train))  # Original mapping for patients 1-14
patient_to_label = {pid: all_patient_to_label[pid] for pid in patient_ids}
Y_train_filtered = np.array([patient_to_label[pid] for pid in patient_ids])

print(f"Filtered data: {X_train.shape[0]} sequences, {len(patient_ids)} patients")
print(f"Class distribution: {np.sum(Y_train_filtered==0)} left vs {np.sum(Y_train_filtered==1)} right impaired")

Filtered data: 444 sequences, 14 patients
Class distribution: 9 left vs 5 right impaired


In [330]:
# Reverse feature extraction function - KEEP only specified features
def extract_features_keep_only(df):
    features_list = []
    
    for idx, row in df.iterrows():
        skeleton_seq = row['Skeleton_Sequence']
        exercise_id = row['Exercise_Id']
        
        # Get features to keep configuration for this exercise
        features_to_keep = EXERCISE_FEATURES_TO_KEEP_30_BEST.get(exercise_id, {})
        
        # Ensure skeleton_seq is 3D: (seq_length, 33, 2)
        if skeleton_seq.ndim == 2 and skeleton_seq.shape[1] == 66:
            skeleton_seq = skeleton_seq.reshape(skeleton_seq.shape[0], 33, 2)
        
        # Calculate means and variances for all keypoints first
        flattened = skeleton_seq.reshape(len(skeleton_seq), -1)  # (seq_length, 66)
        all_means = np.mean(flattened, axis=0)  # 66 features
        all_variances = np.var(flattened, axis=0)  # 66 features
        
        # Initialize with zeros (we'll only fill in the features we want to keep)
        final_means = np.zeros(66)
        final_variances = np.zeros(66)
        
        # Only keep the specified features, leave others as zero
        for kp_idx, components_to_keep in features_to_keep.items():
            # Each keypoint has 2 positions in the mean/variance arrays
            mean_x_idx = kp_idx * 2
            mean_y_idx = kp_idx * 2 + 1
            var_x_idx = kp_idx * 2
            var_y_idx = kp_idx * 2 + 1
            
            if 'mean_x' in components_to_keep:
                final_means[mean_x_idx] = all_means[mean_x_idx]
            if 'mean_y' in components_to_keep:
                final_means[mean_y_idx] = all_means[mean_y_idx]
            if 'var_x' in components_to_keep:
                final_variances[var_x_idx] = all_variances[var_x_idx]
            if 'var_y' in components_to_keep:
                final_variances[var_y_idx] = all_variances[var_y_idx]
        
        features_list.append(np.concatenate([final_means, final_variances]))
    
    return np.array(features_list)

In [331]:
# Extract features for all sequences
X_all_features = extract_features_keep_only(X_train)

# Prepare one-hot encoder for exercises
exercise_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
exercise_encoded = exercise_encoder.fit_transform(X_train[['Exercise_Id']])

# Combine features with exercise encoding
X_combined = np.concatenate([X_all_features, exercise_encoded], axis=1)

# Create sequence-level labels
patient_to_label = dict(zip(patient_ids, Y_train))
y_sequences = X_train['Patient_Id'].map(patient_to_label).values

# Store patient ID for each sequence
sequence_patient_ids = X_train['Patient_Id'].values

In [332]:
X_combined[0]

array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -4.19843892e-01,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
       -2.21562543e-01,  0.00000000e+00,  2.10580915e-01,  0.00000000e+00,
       -1.08712225e-01,  0.00000000e+00,  1.89105874e-01,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  1.47100941e-01,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  1.92108549e-01,  0.00000000e+00,
       -8.43244232e-02,  0.00000000e+00,  1.01910927e-01, -3.83950593e-03,
       -1.00125355e-01,  1.69220185e-03,  2.76740440e-01,  0.00000000e+00,
        0.00000000e+00,  

In [333]:
# Cross-validation setup
n_splits = 1000
majority_accuracies = []
probability_accuracies = []

In [334]:
from tqdm import tqdm

for split in tqdm(range(n_splits), desc="Cross-validation splits", unit="split"):
    # Random stratified split: 2 left + 1 right patients for test
    rng = np.random.RandomState(42 + split)
    
    left_patients = patient_ids[Y_train == 0]  # Patients with label 0
    right_patients = patient_ids[Y_train == 1]  # Patients with label 1
    
    test_left = rng.choice(left_patients, size=2, replace=False)
    test_right = rng.choice(right_patients, size=1, replace=False)
    test_patients = np.concatenate([test_left, test_right])
    train_patients = np.setdiff1d(patient_ids, test_patients)
    
    # Create masks for sequences (much faster than filtering DataFrame)
    train_mask = np.isin(sequence_patient_ids, train_patients)
    test_mask = np.isin(sequence_patient_ids, test_patients)
    
    # Split the precomputed features
    X_train_split = X_combined[train_mask]
    X_test_split = X_combined[test_mask]
    y_train_split = y_sequences[train_mask]
    y_test_split = y_sequences[test_mask]
    test_patient_ids_split = sequence_patient_ids[test_mask]
     
    # Train SVM
    model = Pipeline([
        ('scaler', StandardScaler()),
        ('svm', SVC(kernel='rbf', class_weight='balanced', probability=True, random_state=42))
    ])
    
    model.fit(X_train_split, y_train_split)
    
    # Get both predictions and probabilities for test sequences
    test_sequence_preds = model.predict(X_test_split)
    test_sequence_probs = model.predict_proba(X_test_split)
    
    # METHOD 1: Majority Voting
    patient_predictions = {}
    for i, patient_id in enumerate(test_patient_ids_split):
        if patient_id not in patient_predictions:
            patient_predictions[patient_id] = []
        patient_predictions[patient_id].append(test_sequence_preds[i])
    
    majority_preds = []
    true_labels = []
    for patient_id in test_patients:
        votes = patient_predictions[patient_id]
        pred_label = np.argmax(np.bincount(votes))
        majority_preds.append(pred_label)
        true_labels.append(patient_to_label[patient_id])
    
    majority_acc = balanced_accuracy_score(true_labels, majority_preds)
    majority_accuracies.append(majority_acc)
    
    # METHOD 2: Probability Averaging
    patient_probs = {}
    for i, patient_id in enumerate(test_patient_ids_split):
        if patient_id not in patient_probs:
            patient_probs[patient_id] = []
        patient_probs[patient_id].append(test_sequence_probs[i])
    
    probability_preds = []
    for patient_id in test_patients:
        avg_probs = np.mean(patient_probs[patient_id], axis=0)
        pred_label = np.argmax(avg_probs)
        probability_preds.append(pred_label)
    
    probability_acc = balanced_accuracy_score(true_labels, probability_preds)
    probability_accuracies.append(probability_acc)


Cross-validation splits: 100%|██████████| 1000/1000 [00:38<00:00, 25.92split/s]


In [335]:
# Final results comparison
print(f"\n=== COMPARISON RESULTS ===\n")
print(f"MAJORITY VOTING:")
print(f"  Mean Balanced Accuracy: {np.mean(majority_accuracies):.4f} (±{np.std(majority_accuracies):.4f})")
print(f"  Min: {np.min(majority_accuracies):.4f}, Max: {np.max(majority_accuracies):.4f}")

print(f"\nPROBABILITY AVERAGING:")
print(f"  Mean Balanced Accuracy: {np.mean(probability_accuracies):.4f} (±{np.std(probability_accuracies):.4f})")
print(f"  Min: {np.min(probability_accuracies):.4f}, Max: {np.max(probability_accuracies):.4f}")


=== COMPARISON RESULTS ===

MAJORITY VOTING:
  Mean Balanced Accuracy: 0.7225 (±0.2617)
  Min: 0.2500, Max: 1.0000

PROBABILITY AVERAGING:
  Mean Balanced Accuracy: 0.7087 (±0.2650)
  Min: 0.2500, Max: 1.0000
