# Modules

In [3]:
import datetime
import numpy as np
import os
import pandas as pd
import sys

# # if you are unable to load pdathome.constants, you need to add the path to the src folder to the system path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

from IPython.display import clear_output

from pdathome.constants import *
from pdathome.preprocessing import arm_label_majority_voting

from paradigma.feature_extraction import extract_temporal_domain_features, extract_spectral_domain_features
from paradigma.imu_preprocessing import butterworth_filter
from paradigma.gait_analysis_config import GaitFeatureExtractionConfig
from paradigma.preprocessing_config import IMUPreprocessingConfig
from paradigma.windowing import tabulate_windows

# Process data

In [6]:
for subject in L_PD_IDS + L_HC_IDS:        
    for side in [MOST_AFFECTED_SIDE, LEAST_AFFECTED_SIDE]:
        if os.path.exists(os.path.join(PATH_GAIT_FEATURES, f'{subject}_{side}.pkl')):
            continue

        print(f"Time {datetime.datetime.now()} - {subject} {side} - Processing ...")
        df = pd.read_pickle(os.path.join(PATH_DATAFRAMES, f'{subject}_{side}.pkl'))

        config = IMUPreprocessingConfig()
        config.acceleration_units = 'g'

        for col in config.d_channels_accelerometer.keys():

            # change to correct units [g]
            if config.acceleration_units == 'm/s^2':
                df[col] /= 9.81

            for result, side_pass in zip(['filt', 'grav'], ['hp', 'lp']):
                df[f'{result}_{col}'] = butterworth_filter(
                single_sensor_col=np.array(df[col]),
                order=config.filter_order,
                cutoff_frequency=config.lower_cutoff_frequency,
                passband=side_pass,
                sampling_frequency=config.sampling_frequency,
                )
                    
            df = df.drop(columns=[col])
            df = df.rename(columns={f'filt_{col}': col})

        config = GaitFeatureExtractionConfig()

        config.l_data_point_level_cols += [config.time_colname, 'free_living_label']
        l_ts_cols = [config.time_colname, 'window_nr', 'free_living_label']
        l_export_cols = [config.time_colname, 'window_nr', 'activity_label_majority_voting', 'gait_majority_voting'] + list(config.d_channels_values.keys())

        if subject in L_PD_IDS:
            config.l_data_point_level_cols += ['pre_or_post', 'arm_label']
            l_ts_cols += ['pre_or_post', 'arm_label']
            l_export_cols += ['pre_or_post', 'arm_label_majority_voting']
        if subject in L_TREMOR_IDS:
            config.l_data_point_level_cols += ['tremor_label']
            l_ts_cols += ['tremor_label']


        df_windowed = tabulate_windows(
                df=df,
                time_column_name=config.time_colname,
                data_point_level_cols=config.l_data_point_level_cols,
                window_length_s=config.window_length_s,
                window_step_size_s=config.window_step_size_s,
                sampling_frequency=config.sampling_frequency
        )
        
        # store windows with timestamps for later use
        df_windowed[l_ts_cols].to_pickle(os.path.join(PATH_GAIT_FEATURES, f'{subject}_{side}_ts.pkl'))

        # Determine most prevalent activity
        df_windowed['activity_label_majority_voting'] = df_windowed['free_living_label'].apply(lambda x: pd.Series(x).mode()[0])

        # Determine if the majority of the window is walking
        df_windowed['gait_majority_voting'] = df_windowed['free_living_label'].apply(lambda x: x.count('Walking') >= len(x)/2)

        if subject in L_PD_IDS:
            df_windowed['pre_or_post'] = df_windowed['pre_or_post'].str[0]
            df_windowed['arm_label_majority_voting'] = df_windowed['arm_label'].apply(lambda x: arm_label_majority_voting(config, x))

        df_windowed = df_windowed.drop(columns=[x for x in l_ts_cols if x not in ['window_nr', 'pre_or_post']])

        # compute statistics of the temporal domain signals
        df_windowed = extract_temporal_domain_features(
            config=config,
            df_windowed=df_windowed,
            l_gravity_stats=['mean', 'std']
        )

        # transform the signals from the temporal domain to the spectral domain using the fast fourier transform
        # and extract spectral features
        df_windowed = extract_spectral_domain_features(
            config=config,
            df_windowed=df_windowed,
            sensor=config.sensor,
            l_sensor_colnames=config.l_accelerometer_cols
        )

        df_windowed[l_export_cols].to_pickle(os.path.join(PATH_GAIT_FEATURES, f'{subject}_{side}.pkl'))

        clear_output(wait=False)