# Modules

In [1]:
import datetime
import functools
import json
import linecache
import numpy as np
import os
import pandas as pd
import psutil
import sys

from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import StandardScaler
from multiprocessing import Pool

from pdathome.constants import classifiers, columns, descriptives, labels_rename, parameters, \
    participant_ids, paths, updrs_map
from pdathome.evaluation import calculate_sens, calculate_spec

from paradigma.gait_analysis_config import ArmSwingFeatureExtractionConfig
from paradigma.windowing import create_segments, discard_segments, categorize_segments

# Constants

In [2]:
d_map_activities = {
    'Lie-to-sit': 'Transitioning',
    'Lie-to-stand': 'Transitioning',
    'Sit-to-lie': 'Transitioning',
    'Sit-to-stand (low chair/couch)': 'Transitioning',
    'Sit-to-stand (normal chair)': 'Transitioning',
    'Stand-to-lie': 'Transitioning',
    'Stand-to-sit (low chair/couch)': 'Transitioning',
    'Stand-to-sit (normal chair)': 'Transitioning',
    'Walking downstairs': 'Walking the stairs',
    'Walking upstairs': 'Walking the stairs',
}

config = ArmSwingFeatureExtractionConfig()

# Processing

In [20]:
for subject in ['hbv002', 'hbv012', 'hbv053']:# participant_ids.L_PD_IDS + participant_ids.L_HC_IDS:
    d_performance = {}
    
    # clinical info
    if subject in participant_ids.L_PD_IDS:
        df_patient_info = pd.read_pickle(os.path.join(paths.PATH_CLINICAL_DATA, 'df_patient_info_updrs_3.pkl'))
        df_patient_info = df_patient_info.loc[df_patient_info['record_id']==subject]
        df_patient_info['age'] = datetime.datetime.now().year - df_patient_info['year_of_birth']
        df_patient_info['years_since_diagnosis'] = datetime.datetime.now().year - df_patient_info['year_diagnosis']

        for col in ['age', 'years_since_diagnosis']:
            df_patient_info[col] = df_patient_info[col].apply(lambda x: int(x))

        df_patient_info['gender'] = df_patient_info['gender'].apply(lambda x: 'male' if x==1 else 'female')

        d_performance['clinical'] = {}

        for med_stage, med_prefix in zip(['pre', 'post'], ['OFF', 'ON']):
            d_performance['clinical'][med_stage] = {}
            for side in ['right', 'left']:
                updrs_3_hypokinesia_stage_cols = [f'{med_prefix}_{x}' for x in updrs_map.D_UPDRS_PART_3_MAPPING[side]['hypokinesia'].keys()]
                updrs_3_stage_cols = updrs_3_hypokinesia_stage_cols + [f'{med_prefix}_{x}' for x in updrs_map.D_UPDRS_PART_3_MAPPING[side]['tremor'].keys()]
                df_patient_info[f'{med_prefix}_UPDRS_3_hypokinesia_{side}'] = np.sum(df_patient_info.loc[:, updrs_3_hypokinesia_stage_cols], axis=1)
                df_patient_info[f'{med_prefix}_UPDRS_3_total_{side}'] = np.sum(df_patient_info.loc[:, updrs_3_stage_cols], axis=1)

                if subject in participant_ids.L_PD_MOST_AFFECTED_RIGHT:
                    if side == 'right':
                        affected_side = descriptives.MOST_AFFECTED_SIDE
                    else:
                        affected_side = descriptives.LEAST_AFFECTED_SIDE
                else:
                    if side == 'left':
                        affected_side = descriptives.MOST_AFFECTED_SIDE
                    else:
                        affected_side = descriptives.LEAST_AFFECTED_SIDE
                
                d_performance['clinical'][med_stage][affected_side] = {
                    'subscore': np.sum(df_patient_info.loc[:, updrs_3_hypokinesia_stage_cols], axis=1).values[0],
                    'total': np.sum(df_patient_info.loc[:, updrs_3_stage_cols], axis=1).values[0]
                }


    for model in [classifiers.LOGISTIC_REGRESSION, classifiers.RANDOM_FOREST]:

        d_performance[model] = {}
        
        # thresholds
        with open(os.path.join(paths.PATH_THRESHOLDS, 'gait', f'threshold_{model}.txt'), 'r') as f:
            clf_threshold = np.mean(float(f.read()))

        # predictions
        df_predictions = pd.read_pickle(os.path.join(paths.PATH_GAIT_PREDICTIONS, model, f'{subject}.pkl'))

        # TEMPORARY
        df_predictions = df_predictions.rename(columns={'watch_side': 'side'})

        # PREPROCESS DATA
        df_predictions.loc[df_predictions[columns.PRED_GAIT_PROBA]>=clf_threshold, columns.PRED_GAIT] = 1
        df_predictions.loc[df_predictions[columns.PRED_GAIT_PROBA]<clf_threshold, columns.PRED_GAIT] = 0

        # boolean for gait
        df_predictions.loc[df_predictions[columns.FREE_LIVING_LABEL]=='Walking', 'gait_boolean'] = 1
        df_predictions.loc[df_predictions[columns.FREE_LIVING_LABEL]!='Walking', 'gait_boolean'] = 0

        if subject in participant_ids.L_HC_IDS:
            df_predictions[columns.PRE_OR_POST] = 'HC'
        else:
            # boolean for arm swing
            df_predictions.loc[df_predictions[columns.ARM_LABEL]=='Gait without other behaviours or other positions', 'arm_swing_boolean'] = 1
            df_predictions.loc[df_predictions[columns.ARM_LABEL]!='Gait without other behaviours or other positions', 'arm_swing_boolean'] = 0
            df_predictions.loc[df_predictions[columns.ARM_LABEL]=='Holding an object behind ', columns.ARM_LABEL] = 'Holding an object behind'
            df_predictions[columns.ARM_LABEL] = df_predictions.loc[~df_predictions[columns.ARM_LABEL].isna(), columns.ARM_LABEL].apply(lambda x: labels_rename.ARM_ACTIVITY[x])

        # PROCESS DATA

        # make segments and segment duration categories
        for affected_side in [descriptives.MOST_AFFECTED_SIDE, descriptives.LEAST_AFFECTED_SIDE]:
            df_side = df_predictions.loc[df_predictions[columns.SIDE]==affected_side]

            if subject in participant_ids.L_TREMOR_IDS:
                df_ts = pd.read_pickle(os.path.join(paths.PATH_GAIT_FEATURES, f'{subject}_{affected_side}_ts.pkl'))

                df_ts = df_ts.explode(column=[columns.TIME, columns.FREE_LIVING_LABEL, columns.PRE_OR_POST, columns.ARM_LABEL, columns.TREMOR_LABEL])
                df_ts = df_ts.drop_duplicates(subset=[columns.ID, columns.TIME, columns.FREE_LIVING_LABEL, columns.PRE_OR_POST, columns.ARM_LABEL, columns.TREMOR_LABEL])
                df_ts = df_ts.loc[df_ts[columns.PRE_OR_POST].isin(['pre', 'post'])]

                df_ts.loc[df_ts[columns.ARM_LABEL]=='Holding an object behind ', columns.ARM_LABEL] = 'Holding an object behind'
                df_ts[columns.ARM_LABEL] = df_ts.loc[~df_ts[columns.ARM_LABEL].isna(), columns.ARM_LABEL].apply(lambda x: labels_rename.ARM_ACTIVITY[x])

            fpr, tpr, _ = roc_curve(y_true=np.array(df_side['gait_boolean']), y_score=np.array(df_side[columns.PRED_GAIT_PROBA]), pos_label=1)
            roc = auc(fpr, tpr)

            d_performance[model][affected_side] = {
                'sens': calculate_sens(df=df_side, pred_colname=columns.PRED_GAIT, true_colname='gait_boolean'),
                'spec': calculate_spec(df=df_side, pred_colname=columns.PRED_GAIT, true_colname='gait_boolean'),
                'auc': roc
            }

            for med_stage in df_side[columns.PRE_OR_POST].unique():
                df_med_stage = df_side.loc[df_side[columns.PRE_OR_POST]==med_stage].copy()

                fpr, tpr, _ = roc_curve(y_true=np.array(df_med_stage['gait_boolean']), y_score=np.array(df_med_stage[columns.PRED_GAIT_PROBA]), pos_label=1)
                roc = auc(fpr, tpr)

                d_performance[model][affected_side][med_stage] = {
                    'sens': calculate_sens(df=df_med_stage, pred_colname=columns.PRED_GAIT, true_colname='gait_boolean'),
                    'spec': calculate_spec(df=df_med_stage, pred_colname=columns.PRED_GAIT, true_colname='gait_boolean'),
                    'auc': roc,
                    'size': {
                        'gait_s': df_med_stage.loc[df_med_stage['gait_boolean']==1].shape[0] / parameters.DOWNSAMPLED_FREQUENCY,
                        'non_gait_s': df_med_stage.loc[df_med_stage['gait_boolean']==0].shape[0] / parameters.DOWNSAMPLED_FREQUENCY,
                    }
                }

                df_gait = df_med_stage.loc[df_med_stage[columns.FREE_LIVING_LABEL]=='Walking'].copy()

                df_gait[columns.SEGMENT_NR] = create_segments(
                    time_series=df_gait[columns.TIME],
                    minimum_gap_s=config.window_length_s
                )

                # Remove any segments that do not adhere to predetermined criteria
                df_gait = discard_segments(
                    df=df_gait,
                    time_colname=columns.TIME,
                    segment_nr_colname=columns.SEGMENT_NR,
                    minimum_segment_length_s=config.window_length_s
                )

                df_gait[columns.SEGMENT_CAT] = categorize_segments(
                    segment_nr_series=df_gait[columns.SEGMENT_NR],
                    sampling_frequency=parameters.DOWNSAMPLED_FREQUENCY
                )
  
                # minutes of data per med stage, per affected side, per segment duration category
                d_performance[model][affected_side][med_stage]['segment_duration'] = {}
                for segment_duration in df_gait[columns.SEGMENT_CAT].unique():
                    df_segments_cat = df_gait.loc[df_gait[columns.SEGMENT_CAT]==segment_duration]

                    d_performance[model][affected_side][med_stage]['segment_duration'][segment_duration] = {
                        'sens': calculate_sens(df=df_segments_cat, pred_colname=columns.PRED_GAIT, true_colname='gait_boolean'),
                    }

                    d_performance[model][affected_side][med_stage]['segment_duration'][segment_duration]['minutes'] = df_segments_cat.shape[0]/parameters.DOWNSAMPLED_FREQUENCY/60

                    if subject in participant_ids.L_PD_IDS:
                        d_performance[model][affected_side][med_stage]['segment_duration'][segment_duration]['arm_activities'] = {}

                        for arm_label in df_segments_cat[columns.ARM_LABEL].unique():
                            df_arm_activity = df_segments_cat.loc[df_segments_cat[columns.ARM_LABEL]==arm_label]

                            d_performance[model][affected_side][med_stage]['segment_duration'][segment_duration]['arm_activities'][arm_label] = {
                                'mins': df_arm_activity.shape[0],
                                'sens': calculate_sens(df=df_arm_activity, pred_colname=columns.PRED_GAIT, true_colname='gait_boolean')
                            }

                # minutes of data per activity of MAS
                df_med_stage['label_agg'] = df_med_stage[columns.FREE_LIVING_LABEL].apply(lambda x: d_map_activities[x] if x in d_map_activities.keys() else x)
                d_performance[model][affected_side][med_stage]['activities'] = {}

                for activity_label in df_med_stage['label_agg'].unique():
                    df_activity = df_med_stage.loc[df_med_stage['label_agg']==activity_label]
                    d_performance[model][affected_side][med_stage]['activities'][activity_label] = {
                        'spec': calculate_spec(df=df_activity, pred_colname=columns.PRED_GAIT, true_colname='gait_boolean'),
                    }

                # minutes of data per arm activity of MAS
                if subject in participant_ids.L_PD_IDS:
                    d_performance[model][affected_side][med_stage]['arm_activities'] = {}

                    for arm_label in df_med_stage[columns.ARM_LABEL].unique():
                        df_arm_activity = df_med_stage.loc[df_med_stage[columns.ARM_LABEL]==arm_label]

                        d_performance[model][affected_side][med_stage]['arm_activities'][arm_label] = {
                            'mins': df_arm_activity.shape[0],
                            'sens': calculate_sens(df=df_arm_activity, pred_colname=columns.PRED_GAIT, true_colname='gait_boolean')
                        }

                # effect of tremor on specificity
                if subject in participant_ids.L_TREMOR_IDS:

                    df_med_stage = df_side.loc[df_side[columns.PRE_OR_POST]==med_stage].copy()

                    df_tremor = pd.merge(left=df_med_stage, right=df_ts.loc[df_ts[columns.PRE_OR_POST]==med_stage], on=[columns.TIME, columns.FREE_LIVING_LABEL, columns.PRE_OR_POST, columns.ARM_LABEL], how='left')

                    df_tremor['tremor_label_binned'] = df_tremor[columns.TREMOR_LABEL].apply(
                        lambda x: 'tremor' if x in ['Slight or mild tremor', 'Moderate tremor', 'Severe tremor', 'Tremor with significant upper limb activity'] else
                        ('no_tremor' if x in ['No tremor', 'Periodic activity of hand/arm similar frequency to tremor', 'No tremor with significant upper limb activity'] else
                        np.nan
                        )
                    )

                    for tremor_type in [x for x in df_tremor['tremor_label_binned'].unique() if not pd.isna(x)]:
                        d_performance[model][affected_side][med_stage][f'{tremor_type}_spec'] = calculate_spec(df=df_tremor.loc[df_tremor['tremor_label_binned']==tremor_type], pred_colname=columns.PRED_GAIT, true_colname='gait_boolean')


    print(f"Time {datetime.datetime.now()} - {subject} - Finished.")

Time 2024-09-19 09:22:12.385619 - hbv002 - Finished.
Time 2024-09-19 09:22:44.826350 - hbv012 - Finished.


In [26]:
d_performance['rf']['MAS']['pre']

{'sens': 0.8649262912070728,
 'spec': 0.8951831465477754,
 'auc': 0.9625079066622186,
 'size': {'gait_s': 479.59, 'non_gait_s': 5239.52},
 'segment_duration': {2: {'sens': 0.8091131000813669,
   'minutes': 1.0241666666666667,
   'arm_activities': {'Gait without other arm movements': {'mins': 1645,
     'sens': 1.0},
    'Grabbing': {'mins': 791, 'sens': 0.8938053097345132},
    'Holding forward': {'mins': 2195, 'sens': 0.7189066059225513},
    'Opening by pulling': {'mins': 112, 'sens': 1.0},
    'Closing by pulling': {'mins': 400, 'sens': 0.4225},
    'Holding forward and rotating': {'mins': 114, 'sens': 0.0},
    'Hand on trouser pocket': {'mins': 299, 'sens': 1.0},
    'Patting pet': {'mins': 150, 'sens': 1.0},
    'Hands forward': {'mins': 201, 'sens': 1.0},
    'Hand on chest': {'mins': 238, 'sens': 0.46638655462184875}}},
  3: {'sens': 0.8207561267184699,
   'minutes': 2.2306666666666666,
   'arm_activities': {'Grabbing': {'mins': 2295, 'sens': 0.4309368191721133},
    'Holding f

In [19]:
df_ts

Unnamed: 0,id,time,segment_nr,window_nr,free_living_label,pre_or_post,arm_label,tremor_label
0,hbv012,2278.19,1,1,Sitting,pre,Not gait,No tremor
0,hbv012,2278.2,1,1,Sitting,pre,Not gait,No tremor
0,hbv012,2278.21,1,1,Sitting,pre,Not gait,No tremor
0,hbv012,2278.22,1,1,Sitting,pre,Not gait,No tremor
0,hbv012,2278.23,1,1,Sitting,pre,Not gait,No tremor
...,...,...,...,...,...,...,...,...
9389,hbv012,13450.49,20,1476,Sitting,post,Not gait,
9389,hbv012,13450.5,20,1476,Sitting,post,Not gait,
9389,hbv012,13450.51,20,1476,Sitting,post,Not gait,
9389,hbv012,13450.52,20,1476,Sitting,post,Not gait,
