# Examples on how to access prepared windows and features

### 0) Load packages and functions

In [1]:
# Importing Python and external packages
import os
import sys
import importlib
import json
import csv
from dataclasses import dataclass, field, fields
from itertools import compress
import pandas as pd
import numpy as np
from itertools import product

import matplotlib.pyplot as plt


In [4]:
def get_project_path_in_notebook(
    subfolder=False,
):
    """
    Finds path of projectfolder from Notebook.
    Start running this once to correctly find
    other modules/functions
    """
    path = os.getcwd()

    while path[-20:] != 'dyskinesia_neurophys':

        path = os.path.dirname(path)
    
    if isinstance(subfolder, str):
        if os.path.exists(os.path.join(path,
                                       subfolder)):
            path = os.path.join(path, subfolder)
    
    return path

In [9]:
# define local storage directories
codepath = get_project_path_in_notebook('code')
os.chdir(codepath)

# own utility functions
import utils.utils_fileManagement as utilsFiles
# own data exploration functions
import lfpecog_features.feats_read_proc_data as read_data
import lfpecog_preproc.preproc_import_scores_annotations as importClin
import lfpecog_analysis.ft_processing_helpers as ftProc
import lfpecog_analysis.import_ephys_results as importResults
import lfpecog_analysis.stats_fts_lid_corrs as ftLidCorr
import lfpecog_analysis.load_SSD_features as load_ssdFts
import lfpecog_analysis.ft_processing_helpers as ftProc
import lfpecog_features.feats_helper_funcs as ftHelp
from lfpecog_features.get_ssd_data import get_subject_SSDs
import lfpecog_predict.prepare_predict_arrays as prep_pred_arrs

from lfpecog_plotting.plotHelpers import get_colors
import lfpecog_plotting.plotHelpers as pltHelp
import lfpecog_plotting.plot_FreqCorr as plotFtCorrs
import lfpecog_plotting.plot_SSD_feat_descriptives as plot_ssd_descr

### 0) Define settings

In [33]:
# ft v6 (data v4.0) is most recent, WITHOUT TAP-MOVEMENT EXCLUSION
DATA_VERSION = 'v4.0'    # v4.0: new artef-rem, no reref; v3.0 multiple re-ref
FT_VERSION = 'v6'
INCL_PSD_FTS=['mean_psd', 'variation']
IGNORE_PTS = ['011', '104', '106']  # aborted protocol, poor quality

CDRS_RATER = 'Jeroen'
ANALYSIS_SIDE = 'BILAT'
INCL_CORE_CDRS = True
CATEGORICAL_CDRS = False

MILD_CDRS = 5  # cut off for dyskinesia categorization
SEV_CDRS = 10  # cut off for dyskinesia categorization

In [11]:
# get all available subs with features
SUBS = utilsFiles.get_avail_ssd_subs(DATA_VERSION=DATA_VERSION,
                                     FT_VERSION=FT_VERSION,
                                     IGNORE_PTS=IGNORE_PTS)
print(f'SUBS: n={len(SUBS)} ({SUBS})')  # should be 21


SUBS: n=21 (['110', '101', '016', '023', '017', '022', '012', '008', '103', '013', '108', '014', '105', '102', '019', '010', '009', '021', '020', '107', '109'])


## 1) Import neural features and clinical labels

- For predictions with ECoG-depending features: exclude moments of UNILATERAL dyskinesia IPSILATERAL to ECoG hemisphere

In [None]:
# importlib.reload(load_ssdFts)

# # use as single ft example to debug/develop
# sub_fts = load_ssdFts.ssdFeatures(
#     sub_list=['023'],
#     settings_json=f'ftExtr_spectral_{FT_VERSION}.json',
#     data_version=DATA_VERSION,
# )

Get DataClass containing FEATS and CDRS-LABELS

In [None]:
import lfpecog_analysis.get_acc_task_derivs as accDerivs

In [None]:
# # CREATE FeatureClass containing all features
# importlib.reload(utilsFiles)
# importlib.reload(accDerivs)
# importlib.reload(ftProc)
# importlib.reload(importClin)
# importlib.reload(load_ssdFts)
# importlib.reload(ftLidCorr)


# FT_VERSION = 'v6'
# INCL_CORE_CDRS = True
# CATEG_CDRS = False  # if False, full CDRS spectrum is used
# MILD_CDRS = 5  # cut off for dyskinesia categorization
# SEV_CDRS = 10  # cut off for dyskinesia categorization

# FeatLid = ftProc.FeatLidClass(
#     FT_VERSION=FT_VERSION,
#     INCL_ECOG=True,
#     INCL_ACC_RMS=True,
#     CATEGORICAL_CDRS=CATEG_CDRS,
#     CORR_TARGET='CDRS',  # target for correlation dataframe (not relevant)
#     cutMild=MILD_CDRS, cutSevere=SEV_CDRS,
#     TO_CALC_CORR=True,
# )


In [None]:
# # SAVE FeatLabelClass as pickle

# featLabPath = os.path.join(utilsFiles.get_project_path('data'),
#                            'prediction_data',
#                            'featLabelClasses')
# className = f'featLabels_ft{FT_VERSION}'
# if FeatLid.CORR_TARGET == 'LID': className += '_Lid'
# elif FeatLid.CATEGORICAL_CDRS == True: className += '_CatCdrs'
# else: className += '_Cdrs'

# if FeatLid.INCL_ECOG: className += '_Ecog'
# else: className += '_StnOnly'

# utilsFiles.save_class_pickle(class_to_save=FeatLid,
#                              path=featLabPath,
#                              filename=className)

In [None]:
import lfpecog_analysis.stats_fts_lid_corrs as ft_stats

In [103]:
# LOAD existing classes with features and labels
INCL_ECOG = True  # if True, STN-only patients are NOT included
MILD_CDRS = 4  # cut off for dyskinesia categorization
SEV_CDRS = 8  # cut off for dyskinesia categorization

if INCL_ECOG: ecog_ext = '_Ecog'
else: ecog_ext = '_StnOnly'

# define path where pickle is saved
featLabPath = os.path.join(
    utilsFiles.get_project_path('data'),
    'prediction_data', 'featLabelClasses'
)

predData = utilsFiles.load_class_pickle(
    os.path.join(featLabPath,
                 f'featLabels_ft{FT_VERSION}_'
                 f'Cdrs{ecog_ext}.P'),
    convert_float_np64=True
)
# MERGE GAMMA1-2-3 FEATURES
for sub in predData.FEATS:
    predData.FEATS[sub] = ft_stats.replace_gammas_for_maxGamma(predData.FEATS[sub])


... pickle loading: c:\Users\habetsj\Research\projects\dyskinesia_neurophys\data\prediction_data\featLabelClasses\featLabels_ftv6_Cdrs_Ecog.P


#### Explore data

- Subject info
    - 0XX: subjects with ECoG
    - 1XX: subject WITHOUT ECoG

- Class info
    - stored in class FeatLidClass()
    - class definition in lfpecog_analysis.ft_processing_helpers
    - 

- FEATS: feature dict, sorted on subject
    - FEATS['022] contains dataframe with features per 10-sec window, 50% overlap
    - FEATS['022].index are timestamps for windows
    - FEATS['022].keys() are feature names
    -

- FT_LABELS: dyskinesia-label dict, sorted on subject
    - FEATS['022] contains array with dyskinesia labels, corresponding to FEATS
    - contains full CDRS scales



In [90]:
# Class attributes
print(vars(predData).keys())



dict_keys(['FT_VERSION', 'CDRS_RATER', 'ANALYSIS_SIDE', 'INCL_STN', 'INCL_ECOG', 'EXCL_IPSI_ECOG', 'INCL_CORE_CDRS', 'INCL_PSD_FTS', 'INCL_ACC_RMS', 'IGNORE_PTS', 'CATEGORICAL_CDRS', 'cutMild', 'cutSevere', 'WIN_LEN_sec', 'WIN_OVERLAP_part', 'TO_CALC_CORR', 'CORR_TARGET', 'DATA_VERSION', 'SUBS', 'FEATS', 'FT_LABELS', 'ACC_RMS', 'corrs', 'stat_df'])


In [91]:
ex_sub = '022'
# FEATURE dict 
print(f'feat dict FEATS, sorted by patient:')
print(f'sub {ex_sub} FEATS df SHAPE: {predData.FEATS[ex_sub].shape}')
print(f'Window timestamps: {predData.FEATS[ex_sub].index[:5]}')
print(f'Feature names: {predData.FEATS[ex_sub].keys()[:5]}')
# predData.FEATS['022']  # dataframe with features, index = dopa_time in minutes



feat dict FEATS, sorted by patient:
sub 022 FEATS df SHAPE: (580, 70)
Window timestamps: Float64Index([-0.5833333333333334, -0.5, -0.4166666666666667,
              -0.3333333333333333, -0.25],
             dtype='float64')
Feature names: Index(['ecog_right_delta_mean_psd', 'ecog_right_delta_variation',
       'ecog_right_alpha_mean_psd', 'ecog_right_alpha_variation',
       'ecog_right_lo_beta_mean_psd'],
      dtype='object')


In [93]:
# Dyskinesia label dict
print(f'feat dict FT_LABELS, sorted by patient:')
print(predData.FT_LABELS.keys())
print(f'sub {ex_sub} FT_LABELS arr SHAPE: {predData.FT_LABELS[ex_sub].shape}')

orig_cdrs = predData.FT_LABELS[ex_sub].copy()

# to convert to categories
# categorized outcome:
# 0: none,
# 1: mild,
# 2: moderate,
# 3: severe
cat_cdrs = ftProc.categorical_CDRS(
    orig_cdrs, preLID_separate=False,
    preLID_minutes=0,
    cutoff_mildModerate=MILD_CDRS,
    cutoff_moderateSevere=SEV_CDRS
)


feat dict FT_LABELS, sorted by patient:
dict_keys(['012', '016', '019', '008', '013', '010', '022', '021', '017', '023', '009', '020', '014'])
sub 022 FT_LABELS arr SHAPE: (580,)


In [94]:
# RMS ACC movement dict
print(f'feat dict FT_LABELS, sorted by patient:')
print(predData.FT_LABELS.keys())
print(f'sub {ex_sub} FT_LABELS arr SHAPE: {predData.FT_LABELS[ex_sub].shape}')

orig_cdrs = predData.FT_LABELS[ex_sub].copy()

# to convert to categories
# categorized outcome:
# 0: none,
# 1: mild,
# 2: moderate,
# 3: severe
cat_cdrs = ftProc.categorical_CDRS(
    orig_cdrs, preLID_separate=False,
    preLID_minutes=0,
    cutoff_mildModerate=MILD_CDRS,
    cutoff_moderateSevere=SEV_CDRS
)

feat dict FT_LABELS, sorted by patient:
dict_keys(['012', '016', '019', '008', '013', '010', '022', '021', '017', '023', '009', '020', '014'])
sub 022 FT_LABELS arr SHAPE: (580,)


#### create prediction arrays based on imported features and labels

In [95]:
# Create arrays per subject based on features and labels
importlib.reload(prep_pred_arrs)

(X_total, y_total_binary,
 y_total_scale, sub_ids_total,
 ft_times_total, ft_names) = prep_pred_arrs.get_group_arrays_for_prediction(
    feat_dict=predData.FEATS,
    label_dict=predData.FT_LABELS,
    TO_PLOT = False)

# Merge subject-arrays to one group array for prediction
(X_all, y_all_binary,
 y_all_scale, sub_ids,
 ft_times_all) = prep_pred_arrs.merge_group_arrays(X_total=X_total,
                                    y_total_binary=y_total_binary,
                                    y_total_scale=y_total_scale,
                                    sub_ids_total=sub_ids_total,
                                    ft_times_total=ft_times_total)


print(f'Subjects (n={len(np.unique(sub_ids))}) included: {np.unique(sub_ids)}')

	for sub-012, added X-shape: (803, 70)
	for sub-016, added X-shape: (679, 70)
	for sub-019, added X-shape: (927, 70)
	for sub-008, added X-shape: (545, 70)
	for sub-013, added X-shape: (659, 70)
	for sub-010, added X-shape: (670, 70)
	for sub-022, added X-shape: (580, 70)
	for sub-021, added X-shape: (526, 70)
	for sub-017, added X-shape: (549, 70)
	for sub-023, added X-shape: (549, 70)
	for sub-009, added X-shape: (768, 70)
	for sub-020, added X-shape: (420, 70)
	for sub-014, added X-shape: (519, 70)
012 803
removed rows n=86
(8108, 70) (8108,) (8108,) (8108,) (8108,)
out of n=8108 samples, n=4282 are Dyskinesia (52.8 %)
Subjects (n=13) included: ['008' '009' '010' '012' '013' '014' '016' '017' '019' '020' '021' '022'
 '023']


## 2) Import epoched SSD-data

- Class info
    - stored in class get_subject_SSDs(), (defined in lfpecog_features.get_ssd_data)
    - contains lfp_left, lfp_right (ecog_right/ecog_left): class SSD_bands_windowed()

- lfp_left, lfp_right, ecog_right
    - stored in class SSD_bands_windowed(), (defined in lfpecog_features.get_ssd_data)
    - contains:
        - delta (IS THETA!!)
        - alpha
        - lo_beta
        - hi_beta
        - gamma1
        - gamma2
        - gamma3

- e.g. lo_beta is array with timeseries: windows x samples (720, 20480)

In [45]:
import lfpecog_features.get_ssd_data as ssd
import lfpecog_analysis.get_SSD_timefreqs as ssd_TimeFreq
import lfpecog_plotting.plot_timeFreqs_ssd_psds as plot_ssd_TFs

In [46]:
ex_sub = '022'
# import dataclass containing SSD data
ssdSub = ssd.get_subject_SSDs(sub=ex_sub,
                             incl_stn=True,
                             incl_ecog=True,
                             ft_setting_fname=f'ftExtr_spectral_{FT_VERSION}.json',
                            #  ft_setting_path='INSERT OWN PATH WITH SETTING JSON',
                             )


...loading windowed ssd c:\Users\habetsj\Research\projects\dyskinesia_neurophys\data\windowed_data_classes_10s_0.5overlap\v4.0\sub-022\broadSSD_windowedBands_022_ecog_right_v6.json c:\Users\habetsj\Research\projects\dyskinesia_neurophys\data\windowed_data_classes_10s_0.5overlap\v4.0\sub-022\broadSSD_windowedBands_022_ecog_right_v6.npy
	...loaded SSD windowed-data and meta-info for ecog_right of sub-022
...loading windowed ssd c:\Users\habetsj\Research\projects\dyskinesia_neurophys\data\windowed_data_classes_10s_0.5overlap\v4.0\sub-022\broadSSD_windowedBands_022_lfp_left_v6.json c:\Users\habetsj\Research\projects\dyskinesia_neurophys\data\windowed_data_classes_10s_0.5overlap\v4.0\sub-022\broadSSD_windowedBands_022_lfp_left_v6.npy
	...loaded SSD windowed-data and meta-info for lfp_left of sub-022
...loading windowed ssd c:\Users\habetsj\Research\projects\dyskinesia_neurophys\data\windowed_data_classes_10s_0.5overlap\v4.0\sub-022\broadSSD_windowedBands_022_lfp_right_v6.json c:\Users\habet

In [68]:
ssdSub.ecog_right.alpha.shape


(720, 20480)