# In-clinic data to validate EMA with UPDRS

## 0. Import packages

- document versions for reproducibility

In [1]:
# import packages
import pandas as pd
import numpy as np
import os
import sys
import csv
import json
import importlib
from itertools import product, compress
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr
from scipy.signal import welch

In [2]:
print('Python sys', sys.version)
print('pandas', pd.__version__)
print('numpy', np.__version__)
# print('mne_bids', mne_bids.__version__)
# print('mne', mne.__version__)
# print('sci-py', scipy.__version__)
# print('sci-kit learn', sk.__version__)
# print('matplotlib', plt_version)

"""
Python sys 3.11.5 | packaged by Anaconda, Inc. | (main, Sep 11 2023, 13:26:23) [MSC v.1916 64 bit (AMD64)]
pandas 2.1.1
numpy 1.26.0
"""

Python sys 3.11.5 | packaged by Anaconda, Inc. | (main, Sep 11 2023, 13:26:23) [MSC v.1916 64 bit (AMD64)]
pandas 2.3.2
numpy 2.3.3


'\nPython sys 3.11.5 | packaged by Anaconda, Inc. | (main, Sep 11 2023, 13:26:23) [MSC v.1916 64 bit (AMD64)]\npandas 2.1.1\nnumpy 1.26.0\n'

In [3]:
from utils import load_utils, load_data, prep_data
# from PerceiveImport.classes import main_class

PyPerceive folder added: c:\Users\habetsj\Research\projects\PyPerceive\code


In [4]:
# FOR DEBUGGING
importlib.reload(load_data)
importlib.reload(load_utils)

<module 'utils.load_utils' from 'c:\\Users\\habetsj\\Research\\projects\\EMA_validation\\EMA_clinic_validation\\code\\utils\\load_utils.py'>

In [5]:
figpath = load_utils.get_onedrive_path('emaval_fig')


## 1. Import Data

### 1.1 Import EMA and UPDRS

In [None]:
# # SINGLE CONDITION
# CONDITION = 'm0s0'

# ema_df, updrs_df = load_data.get_EMA_UPDRS_data(condition=CONDITION)


In [None]:
importlib.reload(load_data)
importlib.reload(load_utils)


# list of IDs to exclude bcs data is still missing
excl_ids = []  # 'ema31', 'ema32', 'ema33', 'ema34'

# 4 CONDITIONS
EMA, UPDRS = {}, {}

for COND in ['m0s0', 'm0s1', 'm1s0', 'm1s1']:
    ema_temp, updrs_temp = load_data.get_EMA_UPDRS_data(
        condition=COND, CONVERT_SCORES=True,
    )
    EMA[COND] = ema_temp
    UPDRS[COND] = updrs_temp

    # print(f'EMA ids: {EMA[COND]["study_id"]}')
    # print(f'UPDRS ids: {UPDRS[COND]["study_id"]}')

    for ema_n_excl in excl_ids:
        if ema_n_excl in EMA[COND]['study_id'].values:
            drop_idx = np.where(EMA[COND]['study_id'] == ema_n_excl)[0][0]
            EMA[COND] = EMA[COND].drop(drop_idx).reset_index(drop=True)
            print(f'drop {ema_n_excl} in EMA, index: {drop_idx}')
        if ema_n_excl in UPDRS[COND]['study_id'].values:
            drop_idx = np.where(UPDRS[COND]['study_id'] == ema_n_excl)[0][0]
            UPDRS[COND] = UPDRS[COND].drop(drop_idx).reset_index(drop=True)
            print(f'drop {ema_n_excl} in UPDRS, index: {drop_idx}')
        

## 2. Preprocess data

#### Get (mean-corrected) EMA and UPDRS values per symptom subtype

In [None]:
importlib.reload(load_data)

importlib.reload(prep_data)


sumdf = prep_data.get_sum_df(EMA_dict=EMA, UPDRS_dict=UPDRS,
                             MEAN_CORR=True,)

# sumdf

Split in Training and Test Cohorts

In [None]:
# SPLIT DATA IN TRAIN AND TEST

train_subs, test_subs = prep_data.get_train_test_split(sumdf)

traindf = sumdf.loc[[i for i in sumdf.index if i in train_subs]]

testdf = sumdf.loc[[i for i in sumdf.index if i in test_subs]]

print(traindf.shape, testdf.shape)

## Statistical comparison of training and test cohort

In [80]:
from scipy.stats import mannwhitneyu

In [103]:
fig, axes = plt.subplots(1, len(traindf.keys()), 
                         sharey='row', figsize=(12, 3))

yticks = [-10, -5, 0, 5, 10]

for i_ft, ft in enumerate(list(testdf.keys())):

    train = traindf[ft].values
    train = train[~np.isnan(train)]
    test = testdf[ft].values
    test = test[~np.isnan(test)]

    axes[i_ft].boxplot([train, test])
    axes[i_ft].set_xlabel(f'#{i_ft+1}')

    # do stats
    stat, p = mannwhitneyu(train, test)
    print(f'Variable {i_ft+1}, {ft}: stat: {stat}, p={round(p, 4)}')

for ax in axes:
    ax.set_xticks([])
    ax.set_yticks([])
    ax.spines[['right', 'top', 'left', 'bottom']].set_visible(False)
    for y in yticks: ax.axhline(y, alpha=.3, color='gray',)

axes[0].set_yticks(yticks)
axes[0].set_yticklabels(yticks)
axes[0].set_ylabel('variable change (points)')

plt.suptitle('Intra-individual changes in EMA / UPDRS per therapy condition: training vs test cohort')

plt.tight_layout()


plt.savefig(os.path.join(figpath, 'train_data', 'training_vs_test_vars'), dpi=300,
            facecolor='w',)

plt.close()

Variable 1, EMA_SUM_brady_m0s0: stat: 78.0, p=0.4454
Variable 2, EMA_SUM_tremor_m0s0: stat: 97.0, p=0.9825
Variable 3, EMA_SUM_gait_m0s0: stat: 92.5, p=0.8958
Variable 4, EMA_SUM_nonmotor_m0s0: stat: 81.5, p=0.5407
Variable 5, UPDRS_SUM_brady_m0s0: stat: 80.5, p=0.5129
Variable 6, UPDRS_SUM_tremor_m0s0: stat: 80.0, p=0.4962
Variable 7, UPDRS_SUM_gait_m0s0: stat: 84.5, p=0.6263
Variable 8, EMA_SUM_brady_m0s1: stat: 123.0, p=0.1674
Variable 9, EMA_SUM_tremor_m0s1: stat: 85.5, p=0.7852
Variable 10, EMA_SUM_gait_m0s1: stat: 125.0, p=0.1403
Variable 11, EMA_SUM_nonmotor_m0s1: stat: 117.0, p=0.2669
Variable 12, UPDRS_SUM_brady_m0s1: stat: 111.5, p=0.3908
Variable 13, UPDRS_SUM_tremor_m0s1: stat: 118.5, p=0.2375
Variable 14, UPDRS_SUM_gait_m0s1: stat: 92.0, p=1.0
Variable 15, EMA_SUM_brady_m1s0: stat: 43.0, p=0.3238
Variable 16, EMA_SUM_tremor_m1s0: stat: 36.5, p=0.7287
Variable 17, EMA_SUM_gait_m1s0: stat: 34.5, p=0.882
Variable 18, EMA_SUM_nonmotor_m1s0: stat: 47.0, p=0.1671
Variable 19, UP

## Explore EMA x UPDRS correlations

In [None]:
from scipy.stats import ttest_rel, pearsonr

In [None]:
def scatter_EMA_UPDRS(
    ax, dat_df,
    EMA_subscore = 'brady',
    UPDRS_subscore = 'brady',
    show_updrs_improve=True,
):

    ema_values, updrs_values = [], []

    for COND in ['m0s0', 'm0s1', 'm1s0', 'm1s1']:

        ema_v = dat_df[f'EMA_SUM_{EMA_subscore}_{COND}']
        updrs_v = dat_df[f'UPDRS_SUM_{UPDRS_subscore}_{COND}']

        nan_sel = np.logical_or(pd.isna(ema_v), pd.isna(updrs_v))
        ema_v = ema_v[~nan_sel]
        updrs_v = updrs_v[~nan_sel]

        ema_values.extend(ema_v)
        updrs_values.extend(updrs_v)

    # plot UPDRS clinical IMPROVEMENT
    if show_updrs_improve:
        updrs_values = np.array(updrs_values) * -1
        ax.set_xlabel(f'UPDRS-improvement {UPDRS_subscore}\n(high: less symptoms)')
    
    else:
        ax.set_xlabel(f'UPDRS {UPDRS_subscore}\n(low: less symptoms)')


    ax.scatter(updrs_values, ema_values)
    ax.axhline(y=0, c='gray', alpha=0.3)
    ax.axvline(x=0, c='gray', alpha=0.3)

    R, pval = pearsonr(
        [x for x in updrs_values if not np.isnan(x)],
        [y for y in ema_values if not np.isnan(y)]
    )

    ax.set_title(f'{EMA_subscore}  R: {R.round(2)}, p={pval.round(5)}')
    ax.set_ylabel(f'EMA {EMA_subscore}\n(high: less symptoms)')
    
    return ax

In [None]:
figname = 'motor_corr_meanCorrvalues_2411'

fig, axes = plt.subplots(1, 3, figsize=(12, 4))

for ax, subscore in zip(axes, ['brady', 'tremor', 'gait']):

    ax = scatter_EMA_UPDRS(
        ax=ax, dat_df=traindf,
        EMA_subscore=subscore,
        UPDRS_subscore=subscore,
        show_updrs_improve=True,
    )

plt.tight_layout()

plt.savefig(os.path.join(figpath, 'train_data', 'ema_updrs_corr', figname), dpi=300,
            facecolor='w',)

plt.show()

Stats ema x updrs

In [None]:
print(traindf.columns)

In [None]:
importlib.reload(prep_data)


lmm_df = prep_data.get_lmm_df(traindf)

print(lmm_df.values.shape)

print(lmm_df.columns)

print(lmm_df.dtypes)

In [None]:
import utils.stats as utilsstat
import statsmodels.formula.api as smf


In [None]:
importlib.reload(utilsstat)

# set target motor symptom
motor_target = 'brady'

lmm_fix = {
    'single_motor': f"EMA_SUM_{motor_target} + EMA_SUM_nonmotor",
    'all_motor': (
        "EMA_SUM_brady + EMA_SUM_tremor + "
        "EMA_SUM_gait + EMA_SUM_nonmotor"
    )
}

FIX_EFF = 'all_motor'

# Random intercepts only
model = smf.mixedlm(
    f"UPDRS_SUM_{motor_target} ~ {lmm_fix[FIX_EFF]}",
    lmm_df,
    groups=lmm_df["subid"],
    # re_formula=f"~EMA_SUM_{motor_target}",  # for random slopes for EMA motor
)
result = model.fit()
print(result.summary())

## calculate explained variances
R2_marg, R2_cond = utilsstat.calc_expl_variances(fitted_model=result)

print(f"for {motor_target}: R2_marginal {np.round(R2_marg, 3)},"
      f"R2_conditional: {np.round(R2_cond, 3)}")



Show individual differences in EMA-point vs UPDRS-change

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4),
                         sharey=True, )

for i_trg, target in enumerate(['brady', 'tremor', 'gait']):

    id_coefs = []

    for subid in np.unique(lmm_df['subid']):

        x = lmm_df[f'EMA_SUM_{target}'][lmm_df['subid'] == subid]
        y = lmm_df[f'UPDRS_SUM_{target}'][lmm_df['subid'] == subid]

        try:
            z = np.polyfit(x, y, 1)
            coef = z[0]
        except:
            if all(x == y): coef = 0

        id_coefs.append(coef)
        # plt.scatter(x, y)
        xplot = np.arange(5)
        # plt.plot(xplot, xplot * coef)

    # plt.show()


    axes[i_trg].hist(id_coefs)
    axes[i_trg].set_xlabel('delta UPDRS point / EMA point')
    axes[i_trg].set_ylabel(f'Observations for {target} (n subjects)')

plt.show()

## Hold-out Validation: UPDRS prediction based on EMA

In [None]:
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import r2_score, confusion_matrix
from scipy.stats import f


In [None]:
testdf

In [114]:
def get_holdout_results(traindf, testdf,
                        target, FEAT_SEL,
                        classif='linreg',
                        verbose=False,
                        SHUFFLE=False,
                        PERM_SEED=False):
    """
    returns:
        - y_true
        - y_pred
        - R2
        - F-test
    """
    # print(f'\n{target} prediction, {FEAT_SEL}-variable')
    X_col_sel = {
        'single': [f'EMA_SUM_{target}'],
        'multi': ['EMA_SUM_brady', 'EMA_SUM_tremor',
                  'EMA_SUM_gait', 'EMA_SUM_nonmotor']
    }


    # train cohort
    train_pred_df = prep_data.get_lmm_df(traindf)  # create sample wise df
    X_train = train_pred_df[X_col_sel[FEAT_SEL]].values
    if target == 'updrs_sum':
        # predict motor UPDRS change
        y_train = np.sum(train_pred_df[['UPDRS_SUM_brady', 'UPDRS_SUM_tremor',
                                'UPDRS_SUM_gait']], axis=1).values
    else:
        y_train = train_pred_df[f'UPDRS_SUM_{target}'].values
    
    if SHUFFLE:
        np.random.seed(PERM_SEED)
        np.random.shuffle(y_train)

    # test cohort
    test_pred_df = prep_data.get_lmm_df(testdf)  # create sample wise df
    X_test = test_pred_df[X_col_sel[FEAT_SEL]].values
    if target == 'updrs_sum':
        y_true = np.sum(test_pred_df[['UPDRS_SUM_brady', 'UPDRS_SUM_tremor',
                                  'UPDRS_SUM_gait']], axis=1)
    else:
        y_true = test_pred_df[f'UPDRS_SUM_{target}'].values
    
    # Run prediction
    models = {'linreg': LinearRegression(),
              'lda': LDA(),
              'logreg': LogisticRegression()}
    model = models[classif]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Evauate predictions

    # F-statistic for model usability
    
    # Sum of squares
    SSR = np.sum((y_pred - np.mean(y_true)) ** 2)   # Regression
    SSE = np.sum((y_true - y_pred) ** 2)   # Error
    SST = np.sum((y_true - np.mean(y_true)) ** 2)   # Total

    # Degrees of freedom
    df_reg = X_test.shape[1]
    df_err = len(y_true) - (X_test.shape[1] + 1)  # +1 for coeff next to betas of features

    # Mean squares
    MSR = SSR / df_reg if df_reg > 0 else np.nan
    MSE = SSE / df_err if df_err > 0 else np.nan
    F = MSR / MSE
    # p-value for the observed F
    f_pval = 1 - f.cdf(F, df_reg, df_err)
    # critical F threshold at alpha
    f_crit = f.ppf(1 - .05, df_reg, df_err)
    # print(f'F-test is {F}, p: {f_pval}')

    # R-squared
    R2 = r2_score(y_true=y_true, y_pred=y_pred,)
    R2b = SSR / SST
    # print(f'R-squared is {R2}')

    if verbose:
        print(f'{target}, {FEAT_SEL}: R2 {np.round(R2, 2)}, F-stat '
              f'{np.round(F, 1)}, p = {np.round(f_pval, 4)}')

    return y_true, y_pred, R2, F



In [None]:


for TARGET, FT_SEL in product(
    ['brady', 'tremor', 'gait'],
    ['single', 'multi']
):

    (y_true, y_pred, R2, f_test) = get_holdout_results( 
        traindf=traindf,
        target=TARGET,
        testdf=testdf,
        FEAT_SEL=FT_SEL,
    )


In [116]:
target = 'updrs_sum'
FT_SEL = 'multi'

(y_true, y_pred, R2, f_test) = get_holdout_results( 
    traindf=traindf,
    target=target,
    testdf=testdf,
    FEAT_SEL=FT_SEL,
)

true_stats = {'r2': R2, 'f': f_test}

Permutation

In [115]:
N_PERM = 500
target = 'updrs_sum'
FT_SEL = 'multi'

perm_stats = {'r2': [], 'f': []}


for i_perm in np.arange(N_PERM):
    # print(i_perm)
    (_, _, r2temp, ftemp) = get_holdout_results( 
        traindf=traindf,
        target=target,
        testdf=testdf,
        FEAT_SEL=FT_SEL,
        SHUFFLE=True,
        PERM_SEED=i_perm,
    )
    perm_stats['r2'].append(r2temp)
    perm_stats['f'].append(ftemp)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [None]:
# Plot results

fig, ax = plt.subplots(1, 1, figsize=(5,5))

ax.scatter(y_true, y_pred)

ax.set_xlabel('true change (points)', size=14,)
ax.set_ylabel('predicted change (points)', size=14,)
ax.set_title('Intra-individual UPDRS-fluctuations\n(brady+tremor+gait)',
             size=14,)

ax.tick_params(axis='both', size=14, labelsize=14,)
ax.spines[['right', 'top']].set_visible(False)

# ax.legend(frameon=False, fontsize=14,
#                bbox_to_anchor=(.95, .5), loc='center left')

plt.tight_layout()

plt.savefig(os.path.join(load_utils.get_onedrive_path('figures'),
             'ema_updrs_corr', 'holdoutval_updrsSum'),
             dpi=300, facecolor='w',)

plt.show()

In [122]:
### plot significnaces from permutations

fig, axes = plt.subplots(1, 2, figsize=(8, 3))


for i_ax, metr in enumerate(list(perm_stats.keys())):

    axes[i_ax].hist(perm_stats[metr], color='gray', alpha=.5,)
    axes[i_ax].axvline(np.percentile(perm_stats[metr], 95),
                       color='orange', alpha=.8, lw=3,
                       label='permuted\nalpha 0.05',)
    
    axes[i_ax].axvline(true_stats[metr],
                       color='purple', alpha=.5, lw=1,
                       label='prediction',)
    
    p_calc = sum(np.array(perm_stats[metr]) > true_stats[metr]) / len(perm_stats[metr])
    print(f'metric {metr}: p = {np.round(p_calc, 3)}')

    axes[i_ax].set_xlabel(f'{metr} score', size=14,)

    axes[i_ax].set_ylabel('count (n)', size=14)

    axes[i_ax].tick_params(axis='both', size=14, labelsize=14,)
    axes[i_ax].spines[['right', 'top']].set_visible(False)

axes[1].legend(frameon=False, fontsize=14,
               bbox_to_anchor=(.95, .5), loc='center left')

plt.tight_layout()

plt.savefig(os.path.join(load_utils.get_onedrive_path('figures'),
             'ema_updrs_corr', f'holdOut_updrsSum_{N_PERM}permStats'),
             dpi=300, facecolor='w',)

plt.close()

metric r2: p = 0.0
metric f: p = 0.0


In [None]:
# PLOT AS HEATMAP
HOP = 4

allvalues = [int(v) for v in y_true.values] + [int(v) for v in y_pred]
# Ensure labels are sorted and complete
labels = np.arange(np.min(allvalues) // HOP, np.max(allvalues) // HOP)

# Compute confusion matrix
plot_true = y_true // HOP
plot_pred = y_pred // HOP
cm = confusion_matrix(plot_true, plot_pred, labels=labels)

fig, ax = plt.subplots(figsize=(4, 4))

im = ax.imshow(cm, cmap="viridis", vmax=1,)

# # Add numbers inside cells
# for i in range(cm.shape[0]):
#     for j in range(cm.shape[1]):
#         ax.text(j, i, cm[i, j], ha="center", va="center", color="white")

# Axes labels
ax.set_xlabel("Predicted")
ax.set_ylabel("True")

ax.set_xticks(np.arange(len(labels)))
ax.set_yticks(np.arange(len(labels)))
ax.set_xticklabels([l * HOP for l in labels])
ax.set_yticklabels([l * HOP for l in labels])

# Add colorbar
cbar = fig.colorbar(im)
cbar.set_label("Count")

plt.tight_layout()
plt.show()

### LFP analysis (not included)

to do's:
- double check "rest" task is not excluding data
- include stim-amplitude data rows to double s0 vs s1



In [None]:
importlib.reload(load_utils)

In [None]:

ids = load_data.get_ids()

SKIP_LFPs = {
    'ema03': ['m0s1'],
    'ema07': ['m1s0', 'm1s1'],  # no m1 done: always ['m1s0', 'm1s1']
    'ema09': ['m1s0', 'm1s1'],  # no m1 done: always ['m1s0', 'm1s1']
    'ema10': ['m1s0', 'm1s1'],  # no m1 done: always ['m1s0', 'm1s1']
    'ema12': ['m1s0', 'm1s1'],  # no m1 done: always ['m1s0', 'm1s1']
    'ema14': 'all',  # no m1 done, m0s1 not found in motherfolder
    # 'ema14': ['m1s0', 'm1s1', 'm0s1'],  # ONLY m0s0; EXCLUDE?!
    'ema15': ['m1s0', 'm1s1'],  # no m1 done: always ['m1s0', 'm1s1']
    'ema16': ['m1s0', 'm1s1']  # ONLY m0s0; EXCLUDE?!
    # 'ema16': 'all'  # no m1 done: always ['m1s0', 'm1s1']
}

lfp_data = {}

for ema_id, COND in product(ids.index,
                            ['m0s0', 'm0s1', 'm1s0', 'm1s1']):
    if ema_id in SKIP_LFPs.keys():
        if COND in SKIP_LFPs[ema_id] or SKIP_LFPs[ema_id] == 'all':
            print(f'\n#### SKIP {ema_id} {COND}, not percept ready ####\n')
            continue

    sub = ids.loc[ema_id]['prc_id']
    ses = ids.loc[ema_id]['prc_ses']

    print(f'\nGET LFP {ema_id}, {sub}, {ses}, {COND}')

    # load session that corresponds to current selection
    ### TODO: 'REST' is hardcoded currently, check for issues with task like rest&tap
    sub_data = main_class.PerceiveData(
        sub = sub, 
        incl_modalities=['streaming'],
        incl_session = [ses],
        incl_condition =[COND,],
        incl_task = ["rest"],
        import_json=False, # for addtionally loading the corresponding JSON files as source files, set to True
        warn_for_metaNaNs=True, # True will give you a warning with rows from the metadata table with NaNs. Make sure you have filled out all columns of the file you want to load.
        allow_NaNs_in_metadata=True,
    )

    dat = getattr(sub_data.streaming, ses)
    # only include first two data rows (left and right STN signal)
    dat = getattr(dat, COND).rest.run1.data.get_data()[:2, :]
    ### TODO: include stimulation amplitude data streams to double check whether s0 vs s1 is correct
    lfp_data[f'{ema_id}_{COND}'] = dat

In [None]:
# FIG_PATH = os.path.join(os.path.dirname(os.getcwd()), 'figures', 'lfp_preprocess')
FIG_PATH = load_utils.get_onedrive_path('emaval_fig')
print(f'CHECK FIG_PATH: {FIG_PATH}, exists? -> {os.path.exists(FIG_PATH)}')

In [None]:
def plot_single_lfp_preprocess(
    DAT,
    SUB = 'emaXX',
    COND = 'm0s0',
    N_STD_OUTLIER = 3,
    LOWPASS = 2,
    HIGHPASS = 45,
    SFREQ=250,
    SHOWPLOTS=False,
    SAVEPLOTS=True,
):
    lfp_times = prep_data.get_lfp_times()


    fig, axes = plt.subplots(2, 2)
    for i, (arr, side) in enumerate(
        zip(DAT[f'{SUB}_{COND}'][:2], ['left', 'right'])
    ):
        arr = arr.copy()  # do not overwrite original dict data

        if ids.loc[SUB]['prc_id'] in lfp_times.keys():
            t_start, t_end = lfp_times[ids.loc[SUB]['prc_id']][COND]['rest']
            i_start, i_end = (t_start * 250, t_end * 250)
            arr = arr[i_start:i_end]

        ### plot raw signal
        axes[0, i].plot(arr, color='blue', alpha=.3, label='raw filtered',)

        ### handle outliers
        sel = np.logical_or(arr > (N_STD_OUTLIER * np.std(arr)),
                            arr < (-N_STD_OUTLIER * np.std(arr)))
        # arr[sel] = np.nan  # replace outliers with NaNs
        arr = arr[~sel]  # drop outliers
        
        ### plot resulting arr
        axes[0, i].plot(arr, color='blue', label='cleaned',)
        axes[0, i].set_title(f'{SUB} {COND} {side} STN', weight='bold')
        axes[0, i].set_ylabel(f'{side}-STN activity (yVolt)')
        xticks = np.arange(0, len(arr), 250 * 60)
        axes[0, i].set_xticks(xticks)
        axes[0, i].set_xticklabels(np.arange(len(xticks)))
        axes[0, i].set_xlabel('Time (minutes)')
        axes[0, i].set_ylim(-50, 50)
        # axes[0, i].legend(loc='upper right', frameon=False,)  # legend

        ### plot PSD
        arr = prep_data.lfp_filter(signal=arr, low=LOWPASS, high=HIGHPASS,)
        f, psx = welch(arr, fs=SFREQ,)
        axes[1, i].plot(f, psx)
        axes[1, i].set_ylabel(f'{side}-STN Power (a.u.)')
        axes[1, i].set_xlim(0, 45)
        axes[1, i].set_xlabel('Freq (Hz)')

    plt.tight_layout()

    if SAVEPLOTS:
        plt.savefig(os.path.join(FIG_PATH, 'lfp_preprocess', f'PSD_check_{SUB}_{COND}'),
                    facecolor='w', dpi=150,)
    if SHOWPLOTS: plt.show()
    else: plt.close()

CHECK missing LFP sessions

check motherfolder:
- ema16, sub105: too many runs? UPDRS tasks? 3 rest m0s0, 2 rest m0s1?
- ema14: only m0s0, leave out only one state

In [None]:
lfp_done = np.unique([k.split('_')[0] for k in lfp_data.keys()])

lfp_todo = [s for s in ids.index if s not in lfp_done]

print(lfp_todo)



In [None]:
for sub in lfp_todo:

    print(f'\n{sub}  -> sub-{ids.loc[sub]["prc_id"]} @ {ids.loc[sub]["prc_ses"]}')
    for COND in ['m0s0', 'm0s1', 'm1s0', 'm1s1']:
        print(f'\t{COND}')
        sub_data = main_class.PerceiveData(
            sub = ids.loc[sub]['prc_id'],
            incl_modalities=['streaming'],
            incl_session = [ids.loc[sub]['prc_ses']],
            incl_condition =[COND,],
            incl_task = ["rest"],
            import_json=False, # for addtionally loading the corresponding JSON files as source files, set to True
            warn_for_metaNaNs=True, # True will give you a warning with rows from the metadata table with NaNs. Make sure you have filled out all columns of the file you want to load.
            allow_NaNs_in_metadata=True,
        )

#### Select relevant ephys epochs based on task timings

In [None]:
lfp_times = prep_data.get_lfp_times()
ids = load_data.get_ids()


In [None]:
Fs = 250
sub = 'ema01'
con = 'm0s0'
lfp_sub = ids.loc[sub]['prc_id']

rest_times = lfp_times[lfp_sub][con]['rest']
rest_samples = [rest_times[0] * Fs, rest_times[1] * Fs]

plt.plot(lfp_data[f'{sub}_{con}'][0][rest_samples[0]:rest_samples[1]])

### TODO:
# check if all seconds for available data is working
# correct 'rest' tasks if troublesome i.e. rest&tap
# check s0 and s1 versus stim-ampltidude time series
# plot individual PSDs
# calculate beta-powers X UPDRS correlations
# draft if and if so, how to include movement parts?

#### Plot and save spectral preprocessing

In [None]:
lfp_subs = np.unique([k.split('_')[0] for k in lfp_data.keys()])

# lfp_subs = ['ema01', 'ema08']

for SUB, COND in product(lfp_subs, ['m0s0', 'm0s1', 'm1s0', 'm1s1']):

    print(f'\n### {SUB}, {COND}')
    if f'{SUB}_{COND}' not in lfp_data.keys():
        print(f'...skip {SUB}, {COND}')
        continue

    plot_single_lfp_preprocess(SUB=SUB, COND=COND, DAT=lfp_data,
                               N_STD_OUTLIER=6,
                               SHOWPLOTS=False, SAVEPLOTS=True,)
