# Modules

In [2]:
import numpy as np
import os
import pandas as pd
import sys

# if you are unable to load pdathome.constants, you need to add the path to the src folder to the system path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

from pdathome.classification import cv_train_test_model, windows_to_timestamps, store_model
from pdathome.constants import classifiers, columns, participant_ids, paths
from pdathome.load import load_dataframes_directory

from paradigma.gait_analysis_config import ArmSwingFeatureExtractionConfig

# Constants

In [3]:
classifier = classifiers.RANDOM_FOREST

# Process data

In [12]:
# Initialize configuration
config = ArmSwingFeatureExtractionConfig()

# Define predictors
l_predictors = list(config.d_channels_values.keys())
l_predictors_scale = [x for x in l_predictors if 'dominant' not in x]

# Load data
df_all_subjects = load_dataframes_directory(
    directory_path=paths.PATH_ARM_ACTIVITY_FEATURES,
    l_ids=['hbv002', 'hbv012']# participant_ids.L_PD_IDS + participant_ids.L_HC_IDS
)

# Lists to store results
l_thresholds = []
l_importances = []

# Iterate over subjects and process data
for subject in ['hbv002', 'hbv012'] :# participant_ids.L_PD_IDS + participant_ids.L_HC_IDS:
    print(f"Processing subject {subject}")
    df_subject = df_all_subjects[df_all_subjects[columns.ID] == subject]

    # Train and test model
    df_test, _, importances = cv_train_test_model(
        subject=subject,
        df=df_all_subjects,
        model=classifier,
        l_predictors=l_predictors,
        l_predictors_scale=l_predictors_scale,
        target_column_name=columns.OTHER_ARM_ACTIVITY_MAJORITY_VOTING, 
        pred_proba_colname=columns.PRED_OTHER_ARM_ACTIVITY_PROBA,
        pred_colname=columns.PRED_OTHER_ARM_ACTIVITY,
        step='arm_activity'
    )
   
    # Collect importances
    l_importances.append(importances)

    # Save predictions
    windows_to_timestamps(
        subject=subject, df=df_test,
        path_output=paths.PATH_GAIT_PREDICTIONS, 
        pred_proba_colname=columns.PRED_OTHER_ARM_ACTIVITY,
        step='arm_activity'
    )
    

# Save importances
with open(os.path.join(paths.PATH_CLASSIFIERS, f'{classifier}_importances.txt'), 'w') as f:
    # Flatten the list of dictionaries and format them
    all_importances = pd.concat([pd.Series(imp) for imp in l_importances], axis=1).mean(axis=1)
    for feature, importance in all_importances.items():
        f.write(f'{feature}: {importance}\n')

store_model(
    df=df_all_subjects,
    model=classifier,
    l_predictors=l_predictors,
    l_predictors_scale=l_predictors_scale,
    target_column_name=columns.OTHER_ARM_ACTIVITY_MAJORITY_VOTING,
    path_scalers=paths.PATH_SCALERS,
    path_classifiers=paths.PATH_CLASSIFIERS,
    step='arm_activity'
)