# Examples on how to access prepared windows and features

### 0) Load packages and functions

In [None]:
# Importing Python and external packages
import os
import sys
import json
import importlib
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt


In [None]:
def get_project_path_in_notebook(
    subfolder=False,
):
    """
    Finds path of projectfolder from Notebook.
    Start running this once to correctly find
    other modules/functions
    """
    path = os.getcwd()

    while path[-20:] != 'dyskinesia_neurophys':

        path = os.path.dirname(path)
    
    if isinstance(subfolder, str):
        if os.path.exists(os.path.join(path,
                                       subfolder)):
            path = os.path.join(path, subfolder)
    
    return path

In [None]:
# define local storage directories
codepath = get_project_path_in_notebook('code')
print(codepath)
os.chdir(codepath)

# own functions
import utils.utils_fileManagement as utilsFiles
import lfpecog_analysis.ft_processing_helpers as ftProc
import lfpecog_analysis.load_SSD_features as load_ssdFts
from lfpecog_features.get_ssd_data import get_subject_SSDs
import lfpecog_predict.prepare_predict_arrays as prep_pred_arrs
import lfpecog_analysis.get_acc_task_derivs as accDerivs
import lfpecog_analysis.stats_fts_lid_corrs as ft_stats

### 0) Define settings

In [None]:
USER='jeroen'

# ft v6 (data v4.0) is most recent, WITHOUT TAP-MOVEMENT EXCLUSION
DATA_VERSION = 'v4.0'    # v4.0: new artef-rem, no reref; v3.0 multiple re-ref
FT_VERSION = 'v8'
# INCL_PSD_FTS=['mean_psd', 'variation']
IGNORE_PTS = ['011', '104', '106']  # aborted protocol, poor quality

CDRS_RATER = 'Patricia'
ANALYSIS_SIDE = 'BILAT'
INCL_CORE_CDRS = True
CATEGORICAL_CDRS = False

MILD_CDRS = 4  # cut off for dyskinesia categorization
SEV_CDRS = 8  # cut off for dyskinesia categorization

In [None]:
importlib.reload(utilsFiles)

# get all available subs with features
SUBS = utilsFiles.get_avail_ssd_subs(DATA_VERSION=DATA_VERSION,
                                     FT_VERSION=FT_VERSION,
                                     IGNORE_PTS=IGNORE_PTS,
                                     USER=USER)
print(f'SUBS: n={len(SUBS)} ({SUBS})')  # should be 21


## 1) Import neural features and clinical labels

- For predictions with ECoG-depending features: exclude moments of UNILATERAL dyskinesia IPSILATERAL to ECoG hemisphere

Get DataClass containing FEATS and CDRS-LABELS

In [None]:
# # CREATE FeatureClass containing all features
# importlib.reload(utilsFiles)
# importlib.reload(accDerivs)
# importlib.reload(ftProc)
# importlib.reload(load_ssdFts)


# FT_VERSION = 'v8'
# INCL_CORE_CDRS = True
# CATEG_CDRS = False  # if False, full CDRS spectrum is used
# MILD_CDRS = 3.5  # cut off for dyskinesia categorization
# SEV_CDRS = 7.5  # cut off for dyskinesia categorization

# FeatLid = ftProc.FeatLidClass(
#     FT_VERSION=FT_VERSION,
#     CDRS_RATER='Patricia',
#     INCL_ECOG=False,
#     INCL_ACC_RMS=True,
#     CATEGORICAL_CDRS=CATEG_CDRS,
#     CORR_TARGET='CDRS',  # target for correlation dataframe (not relevant)
#     cutMild=MILD_CDRS, cutSevere=SEV_CDRS,
#     TO_CALC_CORR=False,
# )


In [None]:
# # SAVE FeatLabelClass as pickle

# featLabPath = os.path.join(utilsFiles.get_project_path('data'),
#                            'prediction_data',
#                            'featLabelClasses')
# className = f'featLabels_inclAcc_ft{FT_VERSION}'
# if FeatLid.CORR_TARGET == 'LID': className += '_Lid'
# elif FeatLid.CATEGORICAL_CDRS == True: className += '_CatCdrs'
# else: className += '_Cdrs'

# if FeatLid.INCL_ECOG: className += '_Ecog'
# else: className += '_StnOnly'

# utilsFiles.save_class_pickle(class_to_save=FeatLid,
#                              path=featLabPath,
#                              filename=className)

In [None]:
importlib.reload(ft_stats)

featLabPath = os.path.join(utilsFiles.get_project_path('data'),
                           'prediction_data',
                           'featLabelClasses')

# LOAD existing classes with features and labels
INCL_ACC = True
INCL_ECOG = False  # if True, STN-only patients are NOT included
MILD_CDRS = 4  # cut off for dyskinesia categorization
SEV_CDRS = 8  # cut off for dyskinesia categorization

if INCL_ECOG: ecog_ext = '_Ecog'
else: ecog_ext = '_StnOnly'

# define path where pickle is saved
featLabPath = os.path.join(
    utilsFiles.get_project_path('data', USER=USER),
    'prediction_data', 'featLabelClasses'
)
filename = f'featLabels_ft{FT_VERSION}_Cdrs{ecog_ext}.P'
if INCL_ACC: filename = filename.replace('featLabels', 'featLabels_inclAcc')
predData = utilsFiles.load_class_pickle(
    os.path.join(featLabPath, filename),
    convert_float_np64=True
)
# MERGE GAMMA1-2-3 FEATURES
for sub in predData.FEATS:
    predData.FEATS[sub] = ft_stats.replace_gammas_for_meanGamma(predData.FEATS[sub])


In [None]:
# # Load v6 to get ACC-values

# featLabPath = os.path.join(utilsFiles.get_project_path('data'),
#                            'prediction_data',
#                            'featLabelClasses')

# INCL_ECOG_acc = INCL_ECOG  # if True, STN-only patients are NOT included

# if INCL_ECOG_acc: ecog_acc = '_Ecog'
# else: ecog_acc = '_StnOnly'

# AccData = utilsFiles.load_class_pickle(
#     os.path.join(featLabPath,
#                  f'featLabels_ftv6_'
#                  f'Cdrs{ecog_acc}.P'),
#     convert_float_np64=True
# )

#### Explore data

- Subject info
    - 0XX: subjects with ECoG
    - 1XX: subject WITHOUT ECoG

- Class info
    - stored in class FeatLidClass()
    - class definition in lfpecog_analysis.ft_processing_helpers
    - 

- FEATS: feature dict, sorted on subject
    - FEATS['022] contains dataframe with features per 10-sec window, 50% overlap
    - FEATS['022].index are timestamps for windows
    - FEATS['022].keys() are feature names
    -

- FT_LABELS: dyskinesia-label dict, sorted on subject
    - FEATS['022] contains array with dyskinesia labels, corresponding to FEATS
    - contains full CDRS scales
    - 

- ACC_RMS: mean (L + R) 10-sec (z-scored) root mean squares of vector magnitude of acc-data
    - FEATS['022] contains array with RMS values, corresponding to FEATS
    - average rms values are z-scored within every patient, movement cut off was -0.5 



In [None]:
# Class attributes
print(vars(predData).keys())

In [None]:
ex_sub = '022'
# FEATURE dict 
print(f'feat dict FEATS, sorted by patient:')
print(f'sub {ex_sub} FEATS df SHAPE: {predData.FEATS[ex_sub].shape}')
print(f'Window timestamps: {predData.FEATS[ex_sub].index[:5]}')
print(f'Feature names: {predData.FEATS[ex_sub].keys()[:5]}')
# predData.FEATS['022']  # dataframe with features, index = dopa_time in minutes

print(f'Feature names: {predData.FEATS[ex_sub].keys()}')


In [None]:
# Dyskinesia label dict
print(f'feat dict FT_LABELS, sorted by patient:')
print(predData.FT_LABELS.keys())
print(f'sub {ex_sub} FT_LABELS arr SHAPE: {predData.FT_LABELS[ex_sub].shape}')

orig_cdrs = predData.FT_LABELS[ex_sub].copy()

# to convert to categories
# categorized outcome:
# 0: none,
# 1: mild,
# 2: moderate,
# 3: severe
cat_cdrs = ftProc.categorical_CDRS(
    orig_cdrs, preLID_separate=False,
    preLID_minutes=0,
    cutoff_mildModerate=MILD_CDRS,
    cutoff_moderateSevere=SEV_CDRS
)


In [None]:
# # Check RMS-ACC-v6 with FEATS-v8

# for sub in predData.FEATS.keys():
# # print(f'feat dict ACC_RMS, sorted by patient:')
# # print(AccData.ACC_RMS.keys())
# # print(f'sub {ex_sub} FT_LABELS arr SHAPE: {AccData.ACC_RMS[ex_sub].shape}')
# # rms_sub = AccData.ACC_RMS[ex_sub].copy()

#     print(f'sub-{sub} FEATS  SHAPE: {predData.FEATS[sub].shape}')
#     print(f'sub-{sub} ACC arr SHAPE: {AccData.ACC_RMS[sub].shape}')

#     plt.plot(AccData.ACC_RMS[sub], alpha=.3,)

# plt.show()



#### create prediction arrays based on imported features and labels

- creates:
    - X (features),
    - y (labels),
    - sub_ids.
- does not create:
    - parallel movement (RMS) yet (can be done parallel to y creation)

In [None]:
# acc_rms = []

# for sub in predData.FEATS.keys():
#     acc_rms.extend(AccData.ACC_RMS[sub])

# acc_rms = np.array(acc_rms)
# print(acc_rms.shape)

In [None]:
# Create arrays per subject based on features and labels
importlib.reload(prep_pred_arrs)

(X_total, y_total_binary,
 y_total_scale, sub_ids_total,
 ft_times_total, ft_names, acc_total) = prep_pred_arrs.get_group_arrays_for_prediction(
    feat_dict=predData.FEATS,
    label_dict=predData.FT_LABELS,
    acc_dict=predData.ACC_RMS,
    TO_PLOT = False)
print(f'group arrays made (n={len(X_total)}), to do: merging...')
# Merge subject-arrays to one group array for prediction
(
    X_all, y_all_binary, y_all_scale,
    sub_ids, ft_times_all, acc_rms
) = prep_pred_arrs.merge_group_arrays(
    X_total=X_total,
    y_total_binary=y_total_binary,
    y_total_scale=y_total_scale,
    sub_ids_total=sub_ids_total,
    ft_times_total=ft_times_total,
    ext_acc_arr=acc_total,  # False or array acc_rms
)
print('group arrays merged')

# add categorical CDRS labels
y_all_categ = ftProc.categorical_CDRS(
    y_all_scale, preLID_separate=False,
    preLID_minutes=0,
    cutoff_mildModerate=MILD_CDRS,
    cutoff_moderateSevere=SEV_CDRS
)

print(f'Subjects (n={len(np.unique(sub_ids))}) included: {np.unique(sub_ids)}')

In [None]:
import pickle

In [None]:
###### Save FEATS for Timon
 
dict_data = {
    "X_all" : X_all,
    "y_all_binary" : y_all_binary,
    "y_all_categ" : y_all_categ,
    "y_all_scale" : y_all_scale,
    "sub_ids" : sub_ids,
    "ft_times_all" : ft_times_all,
    "ft_names" : ft_names,
    "ACC_RMS" : acc_rms
}
 
with open(
    os.path.join(utilsFiles.get_project_path('data'),
                 'prediction_data',
                 "X_cross_val_data_STN_acc_v8.pickle"),
    "wb"
) as f:
    pickle.dump(dict_data, f)

# Save ACC Movement arrays

##### Prepare ACC RMS full array


# dict_acc = {"ACC_RMS" : acc_rms,
#             "sub_ids" : sub_ids,
#             "ft_times_all" : ft_times_all
#             }
 
# with open(
#     os.path.join(utilsFiles.get_project_path('data'),
#                  'prediction_data',
#                  "ACC_dataPlus_STN.pickle"),
#     "wb"
# ) as f:
#     pickle.dump(dict_acc, f)

## 2) Import epoched raw- or SSDed-data



#### A) SSD import

- SSD Class info
    - stored in class get_subject_SSDs(), (defined in lfpecog_features.get_ssd_data)
    - contains lfp_left, lfp_right (ecog_right/ecog_left): class SSD_bands_windowed()

    - lfp_left, lfp_right, ecog_right
        - stored in class SSD_bands_windowed(), (defined in lfpecog_features.get_ssd_data)
        - contains:
            - times: list of start-window times for all timeseries-array rows
            - fs (Hz)
            - settings (dict) with all extraction settings
            - delta (IS THETA!!)
            - alpha
            - lo_beta
            - hi_beta
            - gamma1
            - gamma2
            - gamma3

    - e.g. lo_beta is array with timeseries: windows x samples (720, 20480)

In [None]:
import lfpecog_features.get_ssd_data as ssd
import lfpecog_analysis.get_SSD_timefreqs as ssd_TimeFreq
import lfpecog_plotting.plot_timeFreqs_ssd_psds as plot_ssd_TFs

In [None]:
importlib.reload(ssd)

ex_sub = '022'
# import dataclass containing SSD data

ssdSub = ssd.get_subject_SSDs(sub=ex_sub,
                             incl_stn=True,
                             incl_ecog=True,
                             ft_setting_fname=f'ftExtr_spectral_{FT_VERSION}.json',
                             USER=USER,)


In [None]:
print(f'length of timestamps: {np.array(ssdSub.ecog_right.times).shape}')
print(f'shape of ALPHA timeseries (windows x samples): {ssdSub.ecog_right.alpha.shape}')
# settings contains (dict) all feature extraction settings
print(f'sampling freq: {ssdSub.ecog_right.fs}, window length: '
      f'{ssdSub.ecog_right.settings["WIN_LEN_sec"]} sec')


#### B) Raw windows import

- windowed Class info
    - stores ONE DATATYPE (LFP L/R / ECOG), for ONE subject
    - stored in class windowedData(), (defined in utils.utils_windowing)
    - contains:
        - data: array, 3-dimens.: n-windows, n-samples, n-channels
            NOTE: WINDOWS CAN STILL CONTAIN NANs
        - fs: int
        - keys: list (n-channels), contains dopa_time, ephys-channels,
            task, and movement derivatives
        - win_starttimes: list of winstart-times IN SECONDS (dopa_time)
        

In [None]:
importlib.reload(utilsFiles)

USER = 'jeroen'
FT_VERSION = 'v6'

# load ft extraction settings
SETTINGS = utilsFiles.load_ft_ext_cfg(FT_VERSION=FT_VERSION,
                                      USER=USER)

# get available subs
SUBS = SETTINGS['TOTAL_SUBS']

# define 
ephys_sources = ['lfp_right', 'lfp_left', 'ecog_left', 'ecog_right']
use_stored_windows = True

sub = '022'


windows_path = os.path.join(
    utilsFiles.get_project_path('data', USER=USER),
    'windowed_data_classes_'
    f'{SETTINGS["WIN_LEN_sec"]}s_'
    f'{SETTINGS["WIN_OVERLAP_part"]}overlap',
    SETTINGS['DATA_VERSION'],
    f'sub-{sub}')

In [None]:
windows = {}
for dType in ephys_sources[:1]:
    print(f'\tstart {dType}')
    # define path for windows of dType
    dType_fname = (f'sub-{sub}_windows_'
                   f'{SETTINGS["WIN_LEN_sec"]}s_'
                   f'{SETTINGS["DATA_VERSION"]}_{dType}.P')
    dType_win_path = os.path.join(windows_path, dType_fname)

    # check if windows are already available
    if np.logical_and(use_stored_windows,
                      os.path.exists(dType_win_path)):
        print(f'load data from {windows_path}....')
        wins = utilsFiles.load_class_pickle(dType_win_path)
        print(f'\tWINDOWS LOADED from {dType_fname} in {windows_path}')

        # add to dict
        windows[dType] = wins
    else:
        print(f'data not found for sub-{sub}, {dType}')

Load matching Dyskinesia scores to time array

In [None]:

sub = '022'
# times of interest (in seconds)
time_arr = np.array(windows['lfp_right'].win_starttimes)

# get dyskinesia scores CONTRALATERAL to ECoG
# note that function needs time in MINUTES
sub_cdrs = ftProc.find_select_nearest_CDRS_for_ephys(
    sub=sub,
    ft_times=time_arr / 60,  # convert to minutes
    side='contralat ecog', 
    INCL_CORE_CDRS=False,
    cdrs_rater='Jeroen',
)

# get dyskinesia scores BILATERAL SUM incl. CORE
# note that function needs time in MINUTES
sub_cdrs = ftProc.find_select_nearest_CDRS_for_ephys(
    sub=sub,
    ft_times=time_arr / 60,  # convert to minutes
    side='bilat', 
    INCL_CORE_CDRS=True,
    cdrs_rater='Jeroen',
)

In [None]:
# example
print(f'data shape: {wins.data.shape} (sfreq: {wins.fs} Hz)')
print(f'corresponding times of window start: {len(wins.win_starttimes)}')
print(f'corresponding channel keys: {len(wins.keys)}')

print(f'corresponding dyskinesia scores: {len(sub_cdrs)}')