In [1]:
import numpy as np
import os
import pandas as pd
import pickle

from paradigma.config import ArmActivityFeatureExtractionConfig
from paradigma.gait.feature_extraction import pca_transform_gyroscope, compute_angle, remove_moving_average_angle, \
    extract_angle_extremes, compute_range_of_motion, compute_peak_angular_velocity
from paradigma.segmenting import create_segments, discard_segments, categorize_segments

from pdathome.constants import global_constants as gc
from pdathome.utils import merge_timestamps_and_predictions

In [6]:
l_ids = gc.participant_ids.PD_IDS + gc.participant_ids.HC_IDS

arm_swing_parameters = {}

for subject in l_ids:
    print(f"Processing {subject}... ({l_ids.index(subject) + 1}/{len(l_ids)})")
    arm_swing_parameters[subject] = {}

    for affected_side in [gc.descriptives.MOST_AFFECTED_SIDE, gc.descriptives.LEAST_AFFECTED_SIDE]:
        arm_swing_parameters[subject][affected_side] = {}
        config = ArmActivityFeatureExtractionConfig()

        # load timestamp data
        df_ts = pd.read_parquet(os.path.join(gc.paths.PATH_PREPARED_DATA, f'{subject}_{affected_side}.parquet'))

        df_gait = df_ts.loc[df_ts[gc.columns.FREE_LIVING_LABEL] == 'Walking'].copy()

        df_gait[gc.columns.SEGMENT_NR] = create_segments(
            time_array=df_gait[gc.columns.TIME].values,
            max_segment_gap_s=config.max_segment_gap_s,
        )

        df_gait = discard_segments(
            df=df_gait,
            segment_nr_colname=gc.columns.SEGMENT_NR,
            min_segment_length_s=config.min_segment_length_s,
            fs=config.sampling_frequency,
        )

        df_gait[gc.columns.SEGMENT_CAT] = categorize_segments(
            df=df_gait,
            fs=config.sampling_frequency,
        )

        # load arm swing predictions
        df_pred = pd.read_parquet(os.path.join(gc.paths.PATH_ARM_ACTIVITY_PREDICTIONS, gc.classifiers.ARM_ACTIVITY_CLASSIFIER_SELECTED, f'{subject}_{affected_side}.parquet'))

        # Load classification threshold
        with open(os.path.join(gc.paths.PATH_THRESHOLDS, 'arm_activity', f'{gc.classifiers.ARM_ACTIVITY_CLASSIFIER_SELECTED}.txt'), 'r') as f:
            threshold = float(f.read())

        # merge timestamp data into arm swing predictions (keep only predicted gait timestamps)        

        df_ts = merge_timestamps_and_predictions(
            df_ts=df_ts,
            df_pred=df_pred,
            time_colname=gc.columns.TIME,
            pred_proba_colname=gc.columns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA,
            window_length_s=config.window_length_s,
            fs=config.sampling_frequency,
        )

        df_ts = pd.merge(df_ts, df_gait[[gc.columns.TIME, gc.columns.SEGMENT_CAT]], on=gc.columns.TIME, how='left')

        df_ts = df_ts.dropna(subset=gc.columns.GYROSCOPE_COLS).reset_index(drop=True)

        df_ts[gc.columns.PRED_NO_OTHER_ARM_ACTIVITY] = df_ts[gc.columns.PRED_NO_OTHER_ARM_ACTIVITY_PROBA] >= threshold

        if subject not in gc.participant_ids.PD_IDS:
            df_ts[gc.columns.PRE_OR_POST] = gc.descriptives.CONTROLS

        for med_stage in df_ts[gc.columns.PRE_OR_POST].unique():
            arm_swing_parameters[subject][affected_side][med_stage] = {}

            df_med_stage = df_ts[df_ts[gc.columns.PRE_OR_POST] == med_stage].copy()
            # Perform principal component analysis on the gyroscope signals to obtain the angular velocity in the
            # direction of the swing of the arm 
            df_med_stage[gc.columns.VELOCITY] = pca_transform_gyroscope(
                df=df_med_stage,
                y_gyro_colname=gc.columns.GYROSCOPE_Y,
                z_gyro_colname=gc.columns.GYROSCOPE_Z,
                pred_colname=gc.columns.PRED_NO_OTHER_ARM_ACTIVITY,
            )

            # PER SEGMENT
            time_array = np.array(df_med_stage[gc.columns.TIME])
            df_med_stage[gc.columns.SEGMENT_NR] = create_segments(
                time_array=time_array,
                max_segment_gap_s=config.max_segment_gap_s,
            )

            segmented_data = []
            df_grouped = df_med_stage.groupby(gc.columns.SEGMENT_NR, sort=False)

            for _, group in df_grouped:
                time_array = np.array(group[gc.columns.TIME])
                velocity_array = np.array(group[gc.columns.VELOCITY])

                # Integrate the angular velocity to obtain an estimation of the angle
                angle_array = compute_angle(
                    time_array=time_array,
                    velocity_array=velocity_array,
                )

                # Remove the moving average from the angle to account for possible drift caused by the integration
                # of noise in the angular velocity
                angle_array = remove_moving_average_angle(
                    angle_array=angle_array,
                    fs=config.sampling_frequency,
                )
                if len(angle_array) > 0:  # Skip if no windows are created
                    segmented_data.append(angle_array)

            if len(segmented_data) > 0:
                angle_array = np.concatenate(segmented_data, axis=0)
            else:
                raise ValueError("No windows were created from the given data.")

            df_med_stage[gc.columns.ANGLE] = angle_array

            datasets = ['predicted', 'not_predicted']
            if subject in gc.participant_ids.PD_IDS:
                datasets.append('annotated')

            for subset in datasets:
                if subset == 'predicted':
                    df_subset = df_med_stage[df_med_stage[gc.columns.PRED_NO_OTHER_ARM_ACTIVITY]==1]
                    key = 'filtered'
                elif subset == 'annotated':
                    df_subset = df_med_stage[df_med_stage[gc.columns.ARM_LABEL]=='Gait without other behaviours or other positions']
                    key = 'annotated'
                elif subset == 'not_predicted':
                    df_subset = df_med_stage
                    key = 'unfiltered'

                arm_swing_parameters[subject][affected_side][med_stage][key] = {}

                segment_cats = [x for x in df_subset[gc.columns.SEGMENT_CAT].unique() if pd.notna(x)] + ['overall']

                for segment_length in segment_cats:
                    if segment_length == 'overall':
                        df_subset_segment_length = df_subset
                    else:
                        df_subset_segment_length = df_subset[df_subset[gc.columns.SEGMENT_CAT] == segment_length]

                    angle_array = np.array(df_subset_segment_length[gc.columns.ANGLE])
                    velocity_array = np.array(df_subset_segment_length[gc.columns.VELOCITY])

                    angle_extrema_indices, minima_indices, maxima_indices = extract_angle_extremes(
                        angle_array=angle_array,
                        sampling_frequency=config.sampling_frequency,
                        max_frequency_activity=1.75
                    )

                    feature_dict = {
                        'time_s': len(angle_array) / config.sampling_frequency,
                    }

                    if len(angle_extrema_indices) > 1:
                        # Calculate range of motion based on extrema indices
                        feature_dict['range_of_motion'] = compute_range_of_motion(
                            angle_array=angle_array,
                            extrema_indices=list(angle_extrema_indices),
                        )

                        # Compute the forward and backward peak angular velocities
                        feature_dict['forward_pav'], feature_dict['backward_pav'] = compute_peak_angular_velocity(
                            velocity_array=velocity_array,
                            angle_extrema_indices=angle_extrema_indices,
                            minima_indices=minima_indices,
                            maxima_indices=maxima_indices,
                        )

                    arm_swing_parameters[subject][affected_side][med_stage][key][segment_length] = feature_dict

In [5]:
path_output = r'C:\Users\erik_\Documents\PhD\data\pdh_public\preprocessed_data\5.arm_swing_quantification'

with open(os.path.join(path_output, 'quantification.pickle'), 'wb') as f:
    pickle.dump(arm_swing_parameters, f)