In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
WINDOW_SIZE_MS = None

# We can modify these flags if we want to omit specific modalities from a given sensor (e.g: only IMU)
exclude_quat = False
exclude_acc = False
exclude_gyro = False
exclude_mag = False
# We can modify this list if we want to omit some sensors
sensors_to_consider = ["arm_l", "arm_r", "wrist_l", "wrist_r", "trunk"]
# We can modify this list if we want to omit specific features
time_features = ["MAX", "MIN", "AMP", "MEAN", "JERK", "RMS", "COR", "STD"]
frequency_features = ["DOMFREQ", "DOMPOW", "TOTPOW", "SPEC_CENT", "SPEC_SPREAD"]
# Change This flag if we want to apply PCA (otherwise, we can also manually select the features)
apply_pca = False


In [3]:
# Note: if we consider everything we end up with
# 3 (acc, gyro, mag) * 3 (axis) * 5 (sensors) * (8 (time features) + 5 (frequency_features)) + 4 (quat) * 5 * 13 --> 845 features!
# Which are definently too many for our problem

In [4]:
def return_feature_columns(df, sensors_to_consider, time_features:list, frequency_features:list):
    import itertools
    feat_columns = []
    for sensor in sensors_to_consider:
        if time_features is not None:
            for time_feat in time_features:
                quaternion_columns = (
                    df.columns[df.columns.str.contains("Quat") & df.columns.str.contains(sensor) & df.columns.str.contains(time_feat)]
                    if not exclude_quat else []
                )
                acc_columns = (
                    df.columns[df.columns.str.contains("Acc") & df.columns.str.contains(sensor) & df.columns.str.contains(time_feat) ]
                    if not exclude_acc else []
                )
                gyr_columns = (
                    df.columns[df.columns.str.contains("Gyr") & df.columns.str.contains(sensor) & df.columns.str.contains(time_feat) ]
                    if not exclude_gyro else []
                )
                mag_columns = (
                    df.columns[df.columns.str.contains("Mag")& df.columns.str.contains(sensor) & df.columns.str.contains(time_feat)]
                    if not exclude_mag else []
                )
                feat_columns.append(list(quaternion_columns) + list(acc_columns) + list(gyr_columns) + list(mag_columns))
        if frequency_features is not None:
            for freq_feat in frequency_features:
                quaternion_columns = (
                    df.columns[df.columns.str.contains("Quat") & df.columns.str.contains(sensor) & df.columns.str.contains(freq_feat)]
                    if not exclude_quat else []
                )
                acc_columns = (
                    df.columns[df.columns.str.contains("Acc") & df.columns.str.contains(sensor) & df.columns.str.contains(freq_feat) ]
                    if not exclude_acc else []
                )
                gyr_columns = (
                    df.columns[df.columns.str.contains("Gyr") & df.columns.str.contains(sensor) & df.columns.str.contains(freq_feat) ]
                    if not exclude_gyro else []
                )
                mag_columns = (
                    df.columns[df.columns.str.contains("Mag")& df.columns.str.contains(sensor) & df.columns.str.contains(freq_feat)]
                    if not exclude_mag else []
                )
                feat_columns.append(list(quaternion_columns) + list(acc_columns) + list(gyr_columns) + list(mag_columns))
        


    return list(itertools.chain.from_iterable(feat_columns))

In [5]:
save_features_path = r"C:\Users\giusy\OneDrive\Desktop\AI_Healtcare\imu_compensatory_movements\Data\Features"
save_models_path = r"C:\Users\giusy\OneDrive\Desktop\AI_Healtcare\imu_compensatory_movements\Data\Models"
if WINDOW_SIZE_MS is not None:
    df_features = pd.read_csv(save_features_path+f"/features_win_{WINDOW_SIZE_MS}.csv")
    results_save_path = save_models_path+ "/loso_hyperparam_window_{WINDOW_SIZE_MS}.csv"
else:
    df_features = pd.read_csv(save_features_path+f"/features.csv")
    results_save_path = save_models_path+ "/loso_hyperparam.csv"

In [6]:

# Add one-hot encoding for conditions
df_features.loc[:,'Label'] = -1

mask = df_features['condition'] == 'natural'
df_features.loc[mask, 'Label'] = 0
mask = df_features['condition'] == 'elbow_brace'
df_features.loc[mask, 'Label'] = 1
mask = df_features['condition'] == 'elbow_wrist_brace'
df_features.loc[mask, 'Label'] = 2

In [7]:
df_features['Label'].unique()

array([0, 1, 2])

In [8]:
tasks = df_features["task"].unique()
print("Tasks:", tasks)
# consider feature columns according to specifications
feat_cols = return_feature_columns(df_features, sensors_to_consider=sensors_to_consider, time_features=time_features, frequency_features=frequency_features)

Tasks: ['cup-placing' 'peg' 'wiping' 'pouring']


In [9]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [10]:
# Outher-loop: Leave one subject out
subject_ids = ['P02', 'P03','P04', 'P05', 'P06']

for test_subject in subject_ids:
    df_subj_train = df_features[df_features["subject"]!=test_subject]
    df_subj_test = df_features[df_features["subject"]==test_subject]

    # now, split trough all the conditions
    for task in tasks:
        df_train = df_subj_train[df_subj_train["task"] == task]
        df_test = df_subj_test[df_subj_test["task"] == task]

        X_train = df_train[feat_cols]
        Y_train = df_train['Label']

        X_test = df_train[feat_cols]
        Y_test = df_train['Label']


        # Initialize the scaler
        scl = StandardScaler()
        # Fit the scaler to the Training Data
        scl.fit(X_train)
        X_train_scaled = scl.transform(X_train)
        X_test_scaled = scl.transform(X_test)


        ## Optional : apply PCA --> to add

        
        # Initialize the model 

        print("Done with task:", task)

        


Done with task: cup-placing
Done with task: peg
Done with task: wiping
Done with task: pouring
Done with task: cup-placing
Done with task: peg
Done with task: wiping
Done with task: pouring
Done with task: cup-placing
Done with task: peg
Done with task: wiping
Done with task: pouring
Done with task: cup-placing
Done with task: peg
Done with task: wiping
Done with task: pouring
Done with task: cup-placing
Done with task: peg
Done with task: wiping
Done with task: pouring


In [11]:
import sys
sys.path.append(r"C:\Users\giusy\OneDrive\Desktop\AI_Healtcare\imu_compensatory_movements")

In [12]:
from utils.ml import *

In [13]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import f1_score  # or accuracy_score, etc.
from sklearn.base import clone
import numpy as np


subject_ids = ['P02', 'P03', 'P04', 'P05', 'P06']   # or sorted(df_features["subject"].unique())
# tasks already defined somewhere, e.g.:
# tasks = sorted(df_features["task"].unique())

for model_name, (estimator, param_grid) in models.items():
    print(f"\n=== Model: {model_name} ===")
    if model_name == 'XGBoost':
        best_params = None
        best_score = -np.inf

        # -------------------------------------------------------
        # OUTER LOOP: iterate over all hyperparameter combinations
        # -------------------------------------------------------
        for params in ParameterGrid(param_grid):
            print(f"\nTesting params: {params}")
            scores = []

            # ---------------------------------------------------
            # INNER LOOP: LOSO over subjects (and tasks inside)
            # ---------------------------------------------------
            for test_subject in subject_ids:
                df_subj_train = df_features[df_features["subject"] != test_subject]
                df_subj_test  = df_features[df_features["subject"] == test_subject]

                for task in tasks:
                    df_train = df_subj_train[df_subj_train["task"] == task]
                    df_test  = df_subj_test[df_subj_test["task"] == task]


                    X_train = df_train[feat_cols].values
                    y_train = df_train['Label'].values

                    # IMPORTANT: use df_test here, not df_train
                    X_test = df_test[feat_cols].values
                    y_test = df_test['Label'].values

                    # Standardize (fit only on train)
                    scl = StandardScaler()
                    X_train_scaled = scl.fit_transform(X_train)
                    X_test_scaled  = scl.transform(X_test)

                    # Optional PCA
                    if apply_pca:
                        print("Applying PCA (TODO)")
                        # add PCA here if you want

                    # Clone base estimator and set current params
                    clf = clone(estimator).set_params(**params)
                    clf.fit(X_train_scaled, y_train)

                    y_pred = clf.predict(X_test_scaled)

                    # Example metric: macro F1 across classes
                    score = f1_score(y_test, y_pred, average="macro")
                    scores.append(score)

            if not scores:
                print("No valid splits for these params (maybe empty tasks), skipping")
                continue

            mean_score = np.mean(scores)
            print(f"Mean LOSO-CV score for {params}: {mean_score:.4f}")

    


NameError: name 'models' is not defined

# Analyize results

In [None]:
results = pd.read_csv(r"C:\Users\giusy\OneDrive\Desktop\AI_Healtcare\imu_compensatory_movements\Data\Models\loso_hyperparam.csv")

In [None]:
results

Unnamed: 0,model_name,estimator_type,params,test_subject,task,f1_macro,accuracy,precision_macro,recall_macro,features
0,XGBoost,XGBClassifier,"{'learning_rate': 0.1, 'max_depth': 3, 'n_esti...",P02,cup-placing,0.555556,0.666667,0.5,0.666667,"['MAX', 'MIN', 'AMP', 'MEAN', 'JERK', 'RMS', '..."
1,XGBoost,XGBClassifier,"{'learning_rate': 0.1, 'max_depth': 3, 'n_esti...",P02,peg,0.555556,0.666667,0.5,0.666667,"['MAX', 'MIN', 'AMP', 'MEAN', 'JERK', 'RMS', '..."
2,XGBoost,XGBClassifier,"{'learning_rate': 0.1, 'max_depth': 3, 'n_esti...",P02,wiping,0.555556,0.666667,0.5,0.666667,"['MAX', 'MIN', 'AMP', 'MEAN', 'JERK', 'RMS', '..."
3,XGBoost,XGBClassifier,"{'learning_rate': 0.1, 'max_depth': 3, 'n_esti...",P02,pouring,0.555556,0.666667,0.5,0.666667,"['MAX', 'MIN', 'AMP', 'MEAN', 'JERK', 'RMS', '..."
4,XGBoost,XGBClassifier,"{'learning_rate': 0.1, 'max_depth': 3, 'n_esti...",P03,cup-placing,1.000000,1.000000,1.0,1.000000,"['MAX', 'MIN', 'AMP', 'MEAN', 'JERK', 'RMS', '..."
...,...,...,...,...,...,...,...,...,...,...
335,RF,RandomForestClassifier,"{'max_depth': 20, 'n_estimators': 200}",P05,pouring,1.000000,1.000000,1.0,1.000000,"['MAX', 'MIN', 'AMP', 'MEAN', 'JERK', 'RMS', '..."
336,RF,RandomForestClassifier,"{'max_depth': 20, 'n_estimators': 200}",P06,cup-placing,0.555556,0.666667,0.5,0.666667,"['MAX', 'MIN', 'AMP', 'MEAN', 'JERK', 'RMS', '..."
337,RF,RandomForestClassifier,"{'max_depth': 20, 'n_estimators': 200}",P06,peg,0.555556,0.666667,0.5,0.666667,"['MAX', 'MIN', 'AMP', 'MEAN', 'JERK', 'RMS', '..."
338,RF,RandomForestClassifier,"{'max_depth': 20, 'n_estimators': 200}",P06,wiping,1.000000,1.000000,1.0,1.000000,"['MAX', 'MIN', 'AMP', 'MEAN', 'JERK', 'RMS', '..."


In [None]:
results["accuracy"].unique()

array([0.66666667, 1.        , 0.33333333])