# Combining multi-modal data for EMA validation with UPDRS and Ephys

## 0. Import packages

- document versions for reproducibility

In [None]:
# import packages
import pandas as pd
import numpy as np
import os
import sys
import csv
import json
import importlib
from datetime import datetime as dt
from datetime import timedelta as td
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr

In [None]:
print('Python sys', sys.version)
print('pandas', pd.__version__)
print('numpy', np.__version__)
# print('mne_bids', mne_bids.__version__)
# print('mne', mne.__version__)
# print('sci-py', scipy.__version__)
# print('sci-kit learn', sk.__version__)
# print('matplotlib', plt_version)

In [None]:
from utils import load_utils, load_data
from PerceiveImport.classes import main_class

In [None]:
# FOR DEBUGGING
importlib.reload(load_data)

## 1. Import Data

In [None]:
CONDITION = 'm0s0'

Import EMA and UPDRS

In [None]:
# SINGLE CONDITION
ema_df, updrs_df = load_data.get_EMA_UPDRS_data(condition=CONDITION)


In [None]:
# 4 CONDITIONS
EMA, UPDRS = {}, {}

for COND in ['m0s0', 'm0s1', 'm1s0', 'm1s1']:
    ema_temp, updrs_temp = load_data.get_EMA_UPDRS_data(condition=COND)
    EMA[COND] = ema_temp
    UPDRS[COND] = updrs_temp


Import LFP data

In [None]:
ids = load_data.get_ids()

ids_not_percept_processed = [
    'ema06', 'ema10', 'ema11', 'ema12',
    'ema14', 'ema15', 'ema16'
]
# ema11, percept 088 -> no streaming?

lfp_data = {}

for ema_id in ema_df.index:
    if ema_id in ids_not_percept_processed:
        print(f'\n#### SKIP {ema_id}, not percept ready ####\n')
        continue

    sub = ids.loc[ema_id]['prc_id']
    ses = ids.loc[ema_id]['prc_ses']

    print(f'\nGET LFP {ema_id}, {sub}, {ses}')

    sub_data = main_class.PerceiveData(
        sub = sub, 
        incl_modalities=['streaming'],
        incl_session = [ses],
        incl_condition =[CONDITION,],
        incl_task = ["rest"],
        # incl_contact = [7"RingL", "SegmInterR", "SegmIntraR"],
        import_json=False, # for addtionally loading the corresponding JSON files as source files, set to True
        warn_for_metaNaNs=True, # True will give you a warning with rows from the metadata table with NaNs. Make sure you have filled out all columns of the file you want to load.
        allow_NaNs_in_metadata=True,
    # use_bids=True,  # TODO: add to functionality
        #TODO: run column in metadata table sometimes contains float like 1.0, then the data can´t be loaded. Even if transforming the cells in the column to "Text" in Excel doesn´t solve the problem
        #Example for this run problem: sub032 or sub038, survey, fu3m
    )

    dat = getattr(sub_data.streaming, ses)
    dat = getattr(dat, CONDITION).rest.run1.data.get_data()
    
    lfp_data[ema_id] = dat

In [None]:
lfp_data.keys()

Select sub score data

In [None]:
sel = load_data.get_subscores(ema_df, score_type='brady',)
ema_df.keys()[sel]

In [None]:
sel = load_data.get_subscores(updrs_df, score_type='brady',)
updrs_df.keys()[sel]

Get SUM values per condition per subject

In [None]:
SUMS = pd.DataFrame(index=ids.index)

COND = 'm1s1'

for COND in ['m0s0', 'm0s1', 'm1s0', 'm1s1']:

    sel_bool = load_data.get_subscores(EMA[COND], score_type='brady',)
    sel_cols = EMA[COND].keys()[sel_bool]

    sum_col = np.sum(EMA[COND][sel_cols], axis=1)
    SUMS[f'EMA_SUM_{COND}'] = sum_col

    sel_bool = load_data.get_subscores(UPDRS[COND], score_type='brady',)
    sel_cols = UPDRS[COND].keys()[sel_bool]

    sum_col = np.sum(UPDRS[COND][sel_cols], axis=1)
    SUMS[f'UPDRS_SUM_{COND}'] = sum_col

nan_sel = SUMS.values == 0

SUMS[nan_sel] = np.NaN


In [None]:
# ADD individual mean over conditions
for m in ['EMA', 'UPDRS']:
    sel = [k for k in SUMS.keys() if k.startswith(f'{m}_SUM')]
    means = np.nanmean(SUMS[sel], axis=1)
    SUMS[f'{m}_indMean'] = means

# Correct sums with individual means
for m in ['EMA', 'UPDRS']:
    for k in SUMS.keys():
        if not k.startswith(f'{m}_SUM'): continue

        corr_means = SUMS[k] - SUMS[f'{m}_indMean']
        SUMS[f'{k}_corr'] = corr_means

In [None]:
x, y = [], []

for COND in ['m0s0', 'm0s1', 'm1s0', 'm1s1']:

    x.extend(SUMS[f'EMA_SUM_{COND}_corr'])
    y.extend(SUMS[f'UPDRS_SUM_{COND}_corr'])


In [None]:
plt.scatter(x, y)
plt.axhline(y=0, c='gray', alpha=0.3)
plt.axvline(x=0, c='gray', alpha=0.3)

R, p = spearmanr(
    [x for x in x if not np.isnan(x)],
    [y for y in y if not np.isnan(y)]
)

plt.title(f'Spearman R: {R.round(2)}, p={p.round(5)}')
plt.xlabel('EMA corrected vs indiv-mean (max score 10)')
plt.ylabel('UPDRS corrected vs indiv-mean (max score 28)')
plt.show()