# Modules

In [8]:
import datetime
import numpy as np
import os
import pandas as pd
import sys

# # if you are unable to load pdathome.constants, you need to add the path to the src folder to the system path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

from IPython.display import clear_output

from pdathome.constants import columns, descriptives, participant_ids, paths
from pdathome.preprocessing import arm_label_majority_voting

from paradigma.feature_extraction import extract_temporal_domain_features, extract_spectral_domain_features
from paradigma.imu_preprocessing import butterworth_filter
from paradigma.gait_analysis_config import GaitFeatureExtractionConfig
from paradigma.preprocessing_config import IMUPreprocessingConfig
from paradigma.windowing import tabulate_windows

# Process data

In [10]:
for subject in participant_ids.L_PD_IDS + participant_ids.L_HC_IDS:        
    for side in [descriptives.MOST_AFFECTED_SIDE, descriptives.LEAST_AFFECTED_SIDE]:
        # if os.path.exists(os.path.join(paths.PATH_GAIT_FEATURES, f'{subject}_{side}.pkl')):
        #     continue

        print(f"Time {datetime.datetime.now()} - {subject} {side} - Processing ...")
        df = pd.read_pickle(os.path.join(paths.PATH_DATAFRAMES, f'{subject}_{side}.pkl'))

        config = IMUPreprocessingConfig()
        config.acceleration_units = 'g'

        # Extract relevant columns for accelerometer data
        accel_cols = list(config.d_channels_accelerometer.keys())

        # Change to correct units [g]
        df[accel_cols] = df[accel_cols] / 9.81 if config.acceleration_units == 'm/s^2' else df[accel_cols]

        # Extract the accelerometer data as a 2D array
        accel_data = df[accel_cols].values

        # Define filtering passbands
        passbands = ['hp', 'lp'] 
        filtered_data = {}

        # Apply Butterworth filter for each passband and result type
        for result, passband in zip(['filt', 'grav'], passbands):
            filtered_data[result] = butterworth_filter(
                sensor_data=accel_data,
                order=config.filter_order,
                cutoff_frequency=config.lower_cutoff_frequency,
                passband=passband,
                sampling_frequency=config.sampling_frequency
            )

        # Create DataFrames from filtered data
        filtered_dfs = {f'{result}_{col}': pd.Series(data[:, i]) for i, col in enumerate(accel_cols) for result, data in filtered_data.items()}

        # Combine filtered columns into DataFrame
        filtered_df = pd.DataFrame(filtered_dfs)

        # Drop original accelerometer columns and append filtered results
        df = df.drop(columns=accel_cols).join(filtered_df).rename(columns={col: col.replace('filt_', '') for col in filtered_df.columns})

        config = GaitFeatureExtractionConfig()

        config.l_data_point_level_cols += [config.time_colname, columns.FREE_LIVING_LABEL]
        l_ts_cols = [config.time_colname, columns.WINDOW_NR, columns.FREE_LIVING_LABEL]
        l_export_cols = [config.time_colname, columns.WINDOW_NR, columns.ACTIVITY_LABEL_MAJORITY_VOTING, columns.GAIT_MAJORITY_VOTING] + list(config.d_channels_values.keys())

        if subject in participant_ids.L_PD_IDS:
            config.l_data_point_level_cols += [columns.PRE_OR_POST, columns.ARM_LABEL]
            l_ts_cols += [columns.PRE_OR_POST, columns.ARM_LABEL]
            l_export_cols += [columns.PRE_OR_POST, columns.ARM_LABEL_MAJORITY_VOTING]
        if subject in participant_ids.L_TREMOR_IDS:
            config.l_data_point_level_cols += [columns.TREMOR_LABEL]
            l_ts_cols += [columns.TREMOR_LABEL]


        df_windowed = tabulate_windows(
                df=df,
                time_column_name=config.time_colname,
                data_point_level_cols=config.l_data_point_level_cols,
                window_length_s=config.window_length_s,
                window_step_size_s=config.window_step_size_s,
                sampling_frequency=config.sampling_frequency
        )
        
        # store windows with timestamps for later use
        df_windowed[l_ts_cols].to_pickle(os.path.join(paths.PATH_GAIT_FEATURES, f'{subject}_{side}_ts.pkl'))

        # Determine most prevalent activity
        df_windowed[columns.ACTIVITY_LABEL_MAJORITY_VOTING] = df_windowed[columns.FREE_LIVING_LABEL].apply(lambda x: pd.Series(x).mode()[0])

        # Determine if the majority of the window is walking
        df_windowed[columns.GAIT_MAJORITY_VOTING] = df_windowed[columns.FREE_LIVING_LABEL].apply(lambda x: x.count('Walking') >= len(x)/2)

        if subject in participant_ids.L_PD_IDS:
            df_windowed[columns.PRE_OR_POST] = df_windowed[columns.PRE_OR_POST].str[0]
            df_windowed[columns.ACTIVITY_LABEL_MAJORITY_VOTING] = df_windowed[columns.ARM_LABEL].apply(lambda x: arm_label_majority_voting(config, x))

        df_windowed = df_windowed.drop(columns=[x for x in l_ts_cols if x not in [columns.WINDOW_NR, columns.PRE_OR_POST]])

        # compute statistics of the temporal domain signals
        df_windowed = extract_temporal_domain_features(
            config=config,
            df_windowed=df_windowed,
            l_gravity_stats=['mean', 'std']
        )

        # transform the signals from the temporal domain to the spectral domain using the fast fourier transform
        # and extract spectral features
        df_windowed = extract_spectral_domain_features(
            config=config,
            df_windowed=df_windowed,
            sensor=config.sensor,
            l_sensor_colnames=config.l_accelerometer_cols
        )

        df_windowed[l_export_cols].to_pickle(os.path.join(paths.PATH_GAIT_FEATURES, f'{subject}_{side}.pkl'))

        clear_output(wait=False)