# Modules

In [1]:
import datetime
import json
import numpy as np
import os
import pandas as pd
import sys

# # if you are unable to load pdathome.constants, you need to add the path to the src folder to the system path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

from IPython.display import clear_output

from pdathome.constants import *
from pdathome.preprocessing import arm_label_majority_voting

from paradigma.constants import DataColumns
from paradigma.feature_extraction import pca_transform_gyroscope, compute_angle, remove_moving_average_angle, \
    signal_to_ffts, extract_angle_extremes, extract_range_of_motion, extract_peak_angular_velocity, \
    get_dominant_frequency, compute_perc_power, extract_temporal_domain_features, extract_spectral_domain_features
from paradigma.gait_analysis_config import ArmSwingFeatureExtractionConfig
from paradigma.imu_preprocessing import butterworth_filter
from paradigma.preprocessing_config import IMUPreprocessingConfig
from paradigma.windowing import tabulate_windows, create_segments, discard_segments

# Constants

In [2]:
imu_config = IMUPreprocessingConfig()
arm_activity_config = ArmSwingFeatureExtractionConfig()

imu_config.acceleration_units = 'g'

arm_activity_config.pred_gait_proba_colname = 'pred_gait_proba'
arm_activity_config.pred_gait_colname = 'pred_gait'
arm_activity_config.l_data_point_level_cols += [arm_activity_config.time_colname]

In [3]:
l_ids = ['hbv002', 'hbv012', 'hbv053']

l_data_point_level_cols = ([arm_activity_config.time_colname, 'pre_or_post', 'gait_boolean',
                            arm_activity_config.angle_colname, arm_activity_config.velocity_colname] + \
                            arm_activity_config.l_gyroscope_cols + arm_activity_config.l_accelerometer_cols + \
                            [x+'_grav' for x in arm_activity_config.l_accelerometer_cols])

# Process data

In [4]:
with open(os.path.join(PATH_THRESHOLDS, 'gait_thresholds.txt'), 'r') as f:
    threshold = float(f.read())

In [7]:
for subject in ['hbv002']: #l_ids:     
    # Load gait predictions   
    df_pred = pd.read_pickle(os.path.join(PATH_GAIT_PREDICTIONS, f'{subject}_ts.pkl'))

    # Load clinical distribution of participants
    with open(os.path.join(PATH_CLINICAL_DATA, 'distribution_participants.json'), 'r') as f:
        d_participant_distribution = json.load(f)

    # Configure columns based on cohort
    if subject in L_PD_IDS:
        file_sensor_data = 'phys_cur_PD_merged.mat'
        path_annotations = PATH_ANNOTATIONS_PD
        l_cols_to_export = [arm_activity_config.time_colname, arm_activity_config.segment_nr_colname, 'window_nr', 'arm_label', 'pre_or_post']
    else:
        file_sensor_data = 'phys_cur_HC_merged.mat'
        path_annotations = PATH_ANNOTATIONS_CONTROLS
        l_cols_to_export = [arm_activity_config.time_colname, arm_activity_config.segment_nr_colname, 'window_nr']

    l_dfs = []

    for side in [MOST_AFFECTED_SIDE, LEAST_AFFECTED_SIDE]:
        print(f"Time {datetime.datetime.now()} - {subject} {side} - Processing ...")

        # Load sensor data
        df_sensors = pd.read_pickle(os.path.join(PATH_DATAFRAMES, f'{subject}_{side}.pkl'))
        df_pred_side = df_pred.loc[df_pred['side']==side].copy()

        for col in imu_config.d_channels_accelerometer.keys():
            # Change to correct units [g]
            if imu_config.acceleration_units == 'm/s^2':
                df_sensors[col] /= 9.81

            for result, side_pass in zip(['filt', 'grav'], ['hp', 'lp']):
                df_sensors[f'{result}_{col}'] = butterworth_filter(
                    single_sensor_col=np.array(df_sensors[col]),
                    order=imu_config.filter_order,
                    cutoff_frequency=imu_config.lower_cutoff_frequency,
                    passband=side_pass,
                    sampling_frequency=imu_config.sampling_frequency,
                )
                    
            df_sensors = df_sensors.drop(columns=[col]).rename(columns={f'filt_{col}': col})


        # Merge sensor data with predictions
        l_merge_cols = [arm_activity_config.time_colname, 'free_living_label']
        if subject in L_PD_IDS:
            l_merge_cols += ['pre_or_post', 'arm_label']

        df = pd.merge(left=df_pred_side, right=df_sensors, how='left', on=l_merge_cols).reset_index(drop=True)

        # Process free living label and remove nans
        df['gait_boolean'] = (df['free_living_label'] == 'Walking').astype(int)
        df = df.dropna(subset=arm_activity_config.l_gyroscope_cols)
            
        # Apply threshold and filter data
        df[arm_activity_config.pred_gait_colname] = (df[arm_activity_config.pred_gait_proba_colname] >= threshold).astype(int)

        df.loc[df[arm_activity_config.pred_gait_proba_colname]>=threshold, arm_activity_config.pred_gait_colname] = 1
        df.loc[df[arm_activity_config.pred_gait_proba_colname]<threshold, arm_activity_config.pred_gait_colname] = 0

        # Perform principal component analysis on the gyroscope signals to obtain the angular velocity in the
        # direction of the swing of the arm 
        df[arm_activity_config.velocity_colname] = pca_transform_gyroscope(
            df=df,
            y_gyro_colname=DataColumns.GYROSCOPE_Y,
            z_gyro_colname=DataColumns.GYROSCOPE_Z,
            pred_gait_colname=arm_activity_config.pred_gait_colname
        )

        # Integrate the angular velocity to obtain an estimation of the angle
        df[arm_activity_config.angle_colname] = compute_angle(
            velocity_col=df[arm_activity_config.velocity_colname],
            time_col=df[arm_activity_config.time_colname]
        )

        # Remove the moving average from the angle to account for possible drift caused by the integration
        # of noise in the angular velocity
        df[arm_activity_config.angle_smooth_colname] = remove_moving_average_angle(
            angle_col=df[arm_activity_config.angle_colname],
            sampling_frequency=arm_activity_config.sampling_frequency
        )
        
        # Filter unobserved data
        if subject in L_PD_IDS:
            df = df[df['arm_label'] != 'cant assess']
        
        # Use only predicted gait for the subsequent steps
        df = df[df[arm_activity_config.pred_gait_colname] == 1].reset_index(drop=True)

        # Group consecutive timestamps into segments with new segments starting after a pre-specified gap
        df = create_segments(
            df=df,
            time_colname=arm_activity_config.time_colname,
            segment_nr_colname=arm_activity_config.segment_nr_colname,
            minimum_gap_s=arm_activity_config.window_length_s
        )

        # Remove any segments that do not adhere to predetermined criteria
        df = discard_segments(
            df=df,
            time_colname=arm_activity_config.time_colname,
            segment_nr_colname=arm_activity_config.segment_nr_colname,
            minimum_segment_length_s=arm_activity_config.window_length_s
        )

        # Create windows of fixed length and step size from the time series
        l_data_point_level_cols = arm_activity_config.l_data_point_level_cols + (['pre_or_post', 'arm_label'] if subject in L_PD_IDS else [])

        l_dfs = [
            tabulate_windows(
                df=df[df[arm_activity_config.segment_nr_colname] == segment_nr].reset_index(drop=True),
                time_column_name=arm_activity_config.time_colname,
                data_point_level_cols=l_data_point_level_cols,
                segment_nr_colname=arm_activity_config.segment_nr_colname,
                window_length_s=arm_activity_config.window_length_s,
                window_step_size_s=arm_activity_config.window_step_size_s,
                segment_nr=segment_nr,
                sampling_frequency=arm_activity_config.sampling_frequency
            )
            for segment_nr in df[arm_activity_config.segment_nr_colname].unique()
        ]
        df_windowed = pd.concat(l_dfs).reset_index(drop=True)

        # Save windows with timestamps for later use
        df_windowed[l_cols_to_export].to_pickle(os.path.join(PATH_ARM_ACTIVITY_FEATURES, f'{subject}_{side}_ts.pkl'))

        df_windowed = df_windowed.drop(columns=[arm_activity_config.time_colname])

        # Majority voting for labels per window
        if subject in L_PD_IDS:
            df_windowed['other_arm_activity_majority_voting'] = df_windowed['arm_label'].apply(lambda x: x.count('Gait without other behaviours or other positions') < len(x)/2)
            df_windowed['arm_label_majority_voting'] = df_windowed['arm_label'].apply(lambda x: arm_label_majority_voting(arm_activity_config, x))
            df_windowed = df_windowed.drop(columns=['arm_label'])
    
        # Transform the angle from the temporal domain to the spectral domain using the fast fourier transform
        df_windowed['angle_freqs'], df_windowed['angle_fft'] = signal_to_ffts(
            sensor_col=df_windowed[arm_activity_config.angle_smooth_colname],
            window_type=arm_activity_config.window_type,
            sampling_frequency=arm_activity_config.sampling_frequency
        )

        # Obtain the dominant frequency of the angle signal in the frequency band of interest
        # defined by the highest peak in the power spectrum
        df_windowed['angle_dominant_frequency'] = df_windowed.apply(
            lambda x: get_dominant_frequency(
                signal_ffts=x['angle_fft'],
                signal_freqs=x['angle_freqs'],
                fmin=arm_activity_config.power_band_low_frequency,
                fmax=arm_activity_config.power_band_high_frequency
                ),
             axis=1
        )

        df_windowed = df_windowed.drop(columns=['angle_fft', 'angle_freqs'])

        # Compute the percentage of power in the frequency band of interest (i.e., the frequency band of the arm swing)
        df_windowed['angle_perc_power'] = df_windowed[arm_activity_config.angle_smooth_colname].apply(
            lambda x: compute_perc_power(
                sensor_col=x,
                fmin_band=arm_activity_config.power_band_low_frequency,
                fmax_band=arm_activity_config.power_band_high_frequency,
                fmin_total=arm_activity_config.spectrum_low_frequency,
                fmax_total=arm_activity_config.spectrum_high_frequency,
                sampling_frequency=arm_activity_config.sampling_frequency,
                window_type=arm_activity_config.window_type
            )
        )

        # Determine the extrema (minima and maxima) of the angle signal
        extract_angle_extremes(
            df=df_windowed,
            angle_colname=arm_activity_config.angle_smooth_colname,
            dominant_frequency_colname='angle_dominant_frequency',
            sampling_frequency=arm_activity_config.sampling_frequency
        )

        # Calculate the change in angle between consecutive extrema (minima and maxima) of the angle signal inside the window
        df_windowed['angle_amplitudes'] = extract_range_of_motion(angle_extrema_values_col=df_windowed['angle_extrema_values'])

        # Aggregate the changes in angle between consecutive extrema to obtain the range of motion
        df_windowed['range_of_motion'] = df_windowed['angle_amplitudes'].apply(lambda x: np.mean(x) if len(x) > 0 else 0).replace(np.nan, 0)
        df_windowed = df_windowed.drop(columns=['angle_amplitudes'])

        # Compute the forward and backward peak angular velocity using the extrema of the angular velocity
        extract_peak_angular_velocity(
            df=df_windowed,
            velocity_colname=arm_activity_config.velocity_colname,
            angle_minima_colname='angle_minima',
            angle_maxima_colname='angle_maxima'
        )

        # Compute aggregated measures of the peak angular velocity
        for dir in ['forward', 'backward']:
            df_windowed[f'{dir}_peak_ang_vel_mean'] = df_windowed[f'{dir}_peak_ang_vel'].apply(lambda x: np.mean(x) if len(x) > 0 else 0)
            df_windowed[f'{dir}_peak_ang_vel_std'] = df_windowed[f'{dir}_peak_ang_vel'].apply(lambda x: np.std(x) if len(x) > 0 else 0)
            df_windowed = df_windowed.drop(columns=[f'{dir}_peak_ang_vel'])

        # Compute statistics of the temporal domain accelerometer signals
        df_windowed = extract_temporal_domain_features(arm_activity_config, df_windowed, l_gravity_stats=['mean', 'std'])

        # Transform the accelerometer and gyroscope signals from the temporal domain to the spectral domain
        # using the fast fourier transform and extract spectral features
        for sensor, l_sensor_colnames in zip(['accelerometer', 'gyroscope'], [arm_activity_config.l_accelerometer_cols, arm_activity_config.l_gyroscope_cols]):
            df_windowed = extract_spectral_domain_features(arm_activity_config, df_windowed, sensor, l_sensor_colnames)
        
        df_windowed.fillna(0, inplace=True)
        df_windowed['side'] = side

        l_export_cols = [arm_activity_config.time_colname, arm_activity_config.segment_nr_colname, 'window_nr', 'other_arm_activity_majority_voting', 'arm_label_majority_voting'] + list(arm_activity_config.d_channels_values.keys())

        df_windowed[l_export_cols].to_pickle(os.path.join(PATH_ARM_ACTIVITY_FEATURES, f'{subject}_{side}.pkl'))

        # clear_output(wait=False)

Time 2024-09-18 11:13:41.574145 - hbv002 MAS - Processing ...
Time 2024-09-18 11:13:56.223487 - hbv002 LAS - Processing ...
