# Modules

In [2]:
import datetime
import functools
import json
import linecache
import numpy as np
import os
import pandas as pd
import psutil
import sys

from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import StandardScaler
from multiprocessing import Pool

from pdathome.constants import *
from pdathome.evaluation import calculate_sens, calculate_spec

# Constants

In [None]:
d_map_activities = {
    'Lie-to-sit': 'Transitioning',
    'Lie-to-stand': 'Transitioning',
    'Sit-to-lie': 'Transitioning',
    'Sit-to-stand (low chair/couch)': 'Transitioning',
    'Sit-to-stand (normal chair)': 'Transitioning',
    'Stand-to-lie': 'Transitioning',
    'Stand-to-sit (low chair/couch)': 'Transitioning',
    'Stand-to-sit (normal chair)': 'Transitioning',
    'Walking downstairs': 'Walking the stairs',
    'Walking upstairs': 'Walking the stairs',
}

# Processing

In [None]:
for subject in L_PD_IDS + L_HC_IDS:
    d_performance = {}
    
    # clinical info
    if subject in L_PD_IDS:
        df_patient_info = pd.read_pickle(os.path.join(path_patient_info, 'df_patient_info_updrs_3.pkl'))
        df_patient_info = df_patient_info.loc[df_patient_info['record_id']==subject]
        df_patient_info['age'] = datetime.datetime.now().year - df_patient_info['year_of_birth']
        df_patient_info['years_since_diagnosis'] = datetime.datetime.now().year - df_patient_info['year_diagnosis']

        for col in ['age', 'years_since_diagnosis']:
            df_patient_info[col] = df_patient_info[col].apply(lambda x: int(x))

        df_patient_info['gender'] = df_patient_info['gender'].apply(lambda x: 'male' if x==1 else 'female')

        d_performance['clinical'] = {}

        for med_stage, med_prefix in zip(['pre', 'post'], ['OFF', 'ON']):
            d_performance['clinical'][med_stage] = {}
            for side in ['right', 'left']:
                updrs_3_hypokinesia_stage_cols = [f'{med_prefix}_{x}' for x in D_UPDRS_PART_3_MAPPING[side]['hypokinesia'].keys()]
                updrs_3_stage_cols = updrs_3_hypokinesia_stage_cols + [f'{med_prefix}_{x}' for x in D_UPDRS_PART_3_MAPPING[side]['tremor'].keys()]
                df_patient_info[f'{med_prefix}_UPDRS_3_hypokinesia_{side}'] = np.sum(df_patient_info.loc[:, updrs_3_hypokinesia_stage_cols], axis=1)
                df_patient_info[f'{med_prefix}_UPDRS_3_total_{side}'] = np.sum(df_patient_info.loc[:, updrs_3_stage_cols], axis=1)

                if subject in L_PD_MOST_AFFECTED_RIGHT:
                    if side == 'right':
                        affected_side = 'MAS'
                    else:
                        affected_side = 'LAS'
                else:
                    if side == 'left':
                        affected_side = 'MAS'
                    else:
                        affected_side = 'LAS'
                
                d_performance['clinical'][med_stage][affected_side] = {
                    'subscore': np.sum(df_patient_info.loc[:, updrs_3_hypokinesia_stage_cols], axis=1).values[0],
                    'total': np.sum(df_patient_info.loc[:, updrs_3_stage_cols], axis=1).values[0]
                }


    for model in [LOGISTIC_REGRESSION, RANDOM_FOREST]:

        d_performance[model] = {}
        
        # thresholds
        with open(os.path.join(PATH_THRESHOLDS, 'gait', f'threshold_{model}.txt'), 'r') as f:
            clf_threshold = np.mean(float(f.read()))

        # predictions
        df_predictions = pd.read_pickle(os.path.join(PDH_PATH_GAIT_PREDICTIONS, model, f'{subject}.pkl'))

        # PREPROCESS DATA
        df_predictions.loc[df_predictions['pred_gait_proba']>=clf_threshold, 'pred_gait_rounded'] = 1
        df_predictions.loc[df_predictions['pred_gait_proba']<clf_threshold, 'pred_gait_rounded'] = 0

        # boolean for gait
        df_predictions.loc[df_predictions['free_living_label']=='Walking', 'gait_boolean'] = 1
        df_predictions.loc[df_predictions['free_living_label']!='Walking', 'gait_boolean'] = 0

        if subject in L_HC_IDS:
            df_predictions['pre_or_post'] = 'HC'
        else:
            # boolean for arm swing
            df_predictions.loc[df_predictions['arm_label']=='Gait without other behaviours or other positions', 'arm_swing_boolean'] = 1
            df_predictions.loc[df_predictions['arm_label']!='Gait without other behaviours or other positions', 'arm_swing_boolean'] = 0
            df_predictions.loc[df_predictions['arm_label']=='Holding an object behind ', 'arm_label'] = 'Holding an object behind'
            df_predictions['arm_label'] = df_predictions.loc[~df_predictions['arm_label'].isna(), 'arm_label'].apply(lambda x: D_ARM_LABELS_RENAME[x])

        # PROCESS DATA

        # make segments and segment duration categories
        for affected_side in [MOST_AFFECTED_SIDE, LEAST_AFFECTED_SIDE]:
            df_side = df_predictions.loc[df_predictions['affected_side']==affected_side]

            if subject in L_TREMOR_IDS:
                df_ts = pd.read_pickle(os.path.join(PDH_PATH_ACC_FEATURES_DATA, f'{subject}_{affected_side}_ts.pkl'))

                df_ts = df_ts.explode(column=['t', 'free_living_label', 'pre_or_post', 'arm_label', 'tremor_label'])
                df_ts = df_ts.drop_duplicates(subset=['id', 't', 'free_living_label', 'pre_or_post', 'arm_label', 'tremor_label'])
                df_ts = df_ts.loc[df_ts['pre_or_post'].isin(['pre', 'post'])]

                df_ts.loc[df_ts['arm_label']=='Holding an object behind ', 'arm_label'] = 'Holding an object behind'
                df_ts['arm_label'] = df_ts.loc[~df_ts['arm_label'].isna(), 'arm_label'].apply(lambda x: D_ARM_LABELS_RENAME[x])

            fpr, tpr, _ = roc_curve(y_true=np.array(df_side['gait_boolean']), y_score=np.array(df_side[f'pred_gait_proba']), pos_label=1)
            roc = auc(fpr, tpr)

            d_performance[model][affected_side] = {
                'sens': calculate_sens(df_side),
                'spec': calculate_spec(df_side),
                'auc': roc
            }

            for med_stage in df_side['pre_or_post'].unique():
                df_med_stage = df_side.loc[df_side['pre_or_post']==med_stage].copy()

                fpr, tpr, _ = roc_curve(y_true=np.array(df_med_stage['gait_boolean']), y_score=np.array(df_med_stage['pred_gait_proba']), pos_label=1)
                roc = auc(fpr, tpr)

                d_performance[model][affected_side][med_stage] = {
                    'sens': calculate_sens(df_med_stage),
                    'spec': calculate_spec(df_med_stage),
                    'auc': roc,
                    'size': {
                        'gait_s': df_med_stage.loc[df_med_stage['gait_boolean']==1].shape[0] / fs,
                        'non_gait_s': df_med_stage.loc[df_med_stage['gait_boolean']==0].shape[0] / fs,
                    }
                }

                df_gait = df_med_stage.loc[df_med_stage['free_living_label']=='Walking'].copy()

                df_seq, _ = group_sequences(df=df_gait, min_time_gap_s=seq_gap_s, time_colname=TIME_COLUMN, fs=fs)
  
                # minutes of data per med stage, per affected side, per segment duration category
                d_performance[model][affected_side][med_stage]['seq_duration'] = {}
                for sequence_duration in df_seq['sequence_duration_category'].unique():
                    df_seq_cat = df_seq.loc[df_seq['sequence_duration_category']==sequence_duration]

                    d_performance[model][affected_side][med_stage]['seq_duration'][sequence_duration] = {
                        'sens': calculate_sens(df_seq_cat),
                    }

                    d_performance[model][affected_side][med_stage]['seq_duration'][sequence_duration]['minutes'] = df_seq_cat.shape[0]/fs/60

                    if subject in L_PD_IDS:
                        d_performance[model][affected_side][med_stage]['seq_duration'][sequence_duration]['arm_activities'] = {}

                        for arm_label in df_seq_cat['arm_label'].unique():
                            df_arm_activity = df_seq_cat.loc[df_seq_cat['arm_label']==arm_label]

                            d_performance[model][affected_side][med_stage]['seq_duration'][sequence_duration]['arm_activities'][arm_label] = {
                                'mins': df_arm_activity.shape[0],
                                'sens': calculate_sens(df_arm_activity)
                            }

                # minutes of data per activity of MAS
                df_med_stage['label_agg'] = df_med_stage['free_living_label'].apply(lambda x: d_map_activities[x] if x in d_map_activities.keys() else x)
                d_performance[model][affected_side][med_stage]['activities'] = {}

                for activity_label in df_med_stage['label_agg'].unique():
                    df_activity = df_med_stage.loc[df_med_stage['label_agg']==activity_label]
                    d_performance[model][affected_side][med_stage]['activities'][activity_label] = {
                        'spec': calculate_spec(df_activity)
                    }

                # minutes of data per arm activity of MAS
                if subject in L_PD_IDS:
                    d_performance[model][affected_side][med_stage]['arm_activities'] = {}

                    for arm_label in df_med_stage['arm_label'].unique():
                        df_arm_activity = df_med_stage.loc[df_med_stage['arm_label']==arm_label]

                        d_performance[model][affected_side][med_stage]['arm_activities'][arm_label] = {
                            'mins': df_arm_activity.shape[0],
                            'sens': calculate_sens(df_arm_activity)
                        }

                # effect of tremor on specificity
                if subject in L_TREMOR_IDS:

                    df_med_stage = df_side.loc[df_side['pre_or_post']==med_stage].copy()

                    df_tremor = pd.merge(left=df_med_stage, right=df_ts.loc[df_ts['pre_or_post']==med_stage], on=[ID_COLUMN, TIME_COLUMN, 'free_living_label', 'pre_or_post', 'arm_label'], how='left')

                    df_tremor['tremor_label_binned'] = df_tremor['tremor_label'].apply(
                        lambda x: 'tremor' if x in ['Slight or mild tremor', 'Moderate tremor', 'Severe tremor', 'Tremor with significant upper limb activity'] else
                        ('no_tremor' if x in ['No tremor', 'Periodic activity of hand/arm similar frequency to tremor', 'No tremor with significant upper limb activity'] else
                        np.nan
                        )
                    )

                    for tremor_type in [x for x in df_tremor['tremor_label_binned'].unique() if not pd.isna(x)]:
                        d_performance[model][affected_side][med_stage][f'{tremor_type}_spec'] = calculate_spec(df_tremor.loc[df_tremor['tremor_label_binned']==tremor_type])


    print(f"Time {datetime.datetime.now()} - {subject} - Finished.")