In [7]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
import joblib

X_train = joblib.load("Data/Xtrain1.pkl")
Y_train = np.load('Data/Ytrain1.npy')
print(X_train.shape)
print(Y_train.shape)

patient_ids = X_train["Patient_Id"].values
X_train = np.vstack(X_train["Skeleton_Features"].values)



X_train_1, X_test, Y_train_1, Y_test = train_test_split(
    X_train, Y_train, test_size=0.3, random_state=42, stratify=Y_train
)

print(f"Train shape: {X_train_1.shape}, Test shape: {X_test.shape}")

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('selector', SelectFromModel(RandomForestClassifier(n_estimators=200, random_state=42),
                                 threshold="median")),
    ('clf', RandomForestClassifier(n_estimators=300, random_state=42))
])

pipe.fit(X_train_1, Y_train_1)
train_f1 = f1_score(Y_train_1, pipe.predict(X_train_1), average='macro')

test_f1 = f1_score(Y_test, pipe.predict(X_test), average='macro')

print(f"\nTrain F1: {train_f1:.3f}")
print(f"Test  F1: {test_f1:.3f}")


(700, 2)
(700,)
Train shape: (490, 132), Test shape: (210, 132)

Train F1: 1.000
Test  F1: 0.988


In [8]:
def add_angle_feature(X):
    c12x, c12y = 2*(12-1), 2*(12)-1  # 22, 23
    c14x, c14y = 2*(14-1), 2*(14)-1  # 26, 27
    c16x, c16y = 2*(16-1), 2*(16)-1  # 30, 31

    a = np.stack([X[:, c12x], X[:, c12y]], axis=1)
    b = np.stack([X[:, c14x], X[:, c14y]], axis=1)
    c = np.stack([X[:, c16x], X[:, c16y]], axis=1)

    ba = a - b
    bc = c - b
    dot = (ba * bc).sum(axis=1)
    cross = ba[:, 0] * bc[:, 1] - ba[:, 1] * bc[:, 0]
    angles_deg = np.degrees(np.arctan2(np.abs(cross), dot)).reshape(-1, 1)

    # Append angle as last column
    return np.hstack([X, angles_deg])

X_with_angle = add_angle_feature(X_train)

X_train_2, X_test, Y_train_2, Y_test = train_test_split(
    X_with_angle, Y_train, test_size=0.3, random_state=42, stratify=Y_train
)

print(f"Train shape: {X_train_2.shape}, Test shape: {X_test.shape}")

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('selector', SelectFromModel(RandomForestClassifier(n_estimators=200, random_state=42),
                                 threshold="median")),
    ('clf', RandomForestClassifier(n_estimators=300, random_state=42))
])

pipe.fit(X_train_2, Y_train_2)
train_f1 = f1_score(Y_train_2, pipe.predict(X_train_2), average='macro')

test_f1 = f1_score(Y_test, pipe.predict(X_test), average='macro')

print(f"\nTrain F1: {train_f1:.3f}")
print(f"Test  F1: {test_f1:.3f}")


Train shape: (490, 133), Test shape: (210, 133)

Train F1: 1.000
Test  F1: 0.996
