# Explore Spectral Feature Extracted based on SSD

### 0) Load packages and functions

In [1]:
# Importing Python and external packages
import os
import sys
import importlib
import json
import csv
from dataclasses import dataclass, field, fields
from itertools import compress
import pandas as pd
import numpy as np

import sklearn as sk
from scipy import signal, stats

import matplotlib.pyplot as plt



In [2]:
def get_project_path_in_notebook(
    subfolder: str = '',
):
    """
    Finds path of projectfolder from Notebook.
    Start running this once to correctly find
    other modules/functions
    """
    path = os.getcwd()

    while path[-20:] != 'dyskinesia_neurophys':

        path = os.path.dirname(path)
    
    return path

In [5]:
# define local storage directories
projectpath = get_project_path_in_notebook()
codepath = os.path.join(projectpath, 'code')
figpath = os.path.join(projectpath, 'figures')
datapath = os.path.join(projectpath, 'data')
feat_path = os.path.join(projectpath, 'results', 'features')

In [4]:
os.chdir(codepath)
# own utility functions
import utils.utils_fileManagement as utilsFiles
import utils.utils_windowing as utilsWindows
from utils.utils_fileManagement import (get_project_path,
                                        load_class_pickle,
                                        save_class_pickle,
                                        mergedData,
                                        correct_acc_class)
# own data preprocessing functions
import lfpecog_preproc.preproc_data_management as dataMng
import lfpecog_preproc.preproc_filters as fltrs
# own data exploration functions
import lfpecog_features.feats_read_proc_data as read_data
import lfpecog_plotting.expl_plotting as expl_plot
import lfpecog_features.feats_spectral_baseline as specBase
import lfpecog_features.feats_spectral_features as spectral
import lfpecog_features.feats_spectral_helpers as specHelp


import lfpecog_preproc.preproc_import_scores_annotations as importClin
import lfpecog_analysis.import_ephys_results as importResults
import lfpecog_analysis.get_acc_derivs as accDerivs


from lfpecog_plotting.plotHelpers import remove_duplicate_legend

### 1) Define settings

In [228]:
WIN_LEN_sec = 10
WIN_OVERLAP_part = 0.0
ssd_path = os.path.join(feat_path, 'SSD_powers',
                        f'windows_{WIN_LEN_sec}s_'
                        f'{WIN_OVERLAP_part}overlap')
IGNORE_PTS = ['010', '014',]

LID_SCORE_INCL = 1  # from this score, features are labeled into LID+ group

In [229]:
# get all available subs with features 
SUBS = list(set([name.split('_')[1] for name in os.listdir(ssd_path)]))

for sub in IGNORE_PTS:
    SUBS.remove(sub)

First try:
- only include ECoG and ipsilateral STN LFP
- exclude moments where was only Dyskinesia in body-side ipsilateral to ECoG (NOT CORRESPONDING WITH ECoG-hemisphere)

### 1a) Load Clinical Scores

Select moments with Dyskinesia at WRONG BODYSIDE (ipsilateral to ECoG) for removal later on

In [230]:
SCORES = {}
ECOG_SIDES = {}
REMOVE_TIMES = {}  # remove moments with only 'WRONG SIDE' dyskinesia

for sub in SUBS:
        # get CDRS
        scores_temp = importClin.run_import_clinInfo(sub=sub)
                # check if scores are present
        if type(scores_temp) == type(None):
                print(f'None CDRS-scores loaded for sub {sub}')
                continue

        # get ECoG-side
        ecog_side = importClin.get_ecog_side(sub)
        ECOG_SIDES[sub] = ecog_side
        # define CDRS of body-side to include
        if ecog_side == 'left': LID_side_incl = 'right'
        elif ecog_side == 'right': LID_side_incl = 'left'
        
        # identify minutes to remove bcs only Dyskinesia at none-ECoG side
        REMOVE_TIMES[sub] = []
        for i, t in enumerate(scores_temp['dopa_time']):
                if np.logical_and(scores_temp.iloc[i][f'CDRS_total_{LID_side_incl}'] < 1,
                                scores_temp.iloc[i][f'CDRS_total_{ecog_side}'] > 0):
                        REMOVE_TIMES[sub].append(t)

        # include selected CDRS
        SCORES[sub] = scores_temp[['dopa_time', f'CDRS_total_{LID_side_incl}']]
        



### 1b) Load Features

Only include ECoG and ECoG-sided STN-LFP for now

In [231]:
FEATS = {}
for sub in SUBS:
    ecog_side = ECOG_SIDES[sub]
    # load ECog Features
    ecog_fts = pd.read_csv(os.path.join(ssd_path, f'SSDfeatures_{sub}_ecog_{ecog_side}.csv'),
                            index_col=0, header=0)
    # rename and add ECOG to ft-names
    rename_cols = {}
    for key in ecog_fts.keys(): rename_cols[key] = f'ECOG_{key}'
    ecog_fts = ecog_fts.rename(columns=rename_cols)
    
    # load ECog Features
    stn_fts = pd.read_csv(os.path.join(ssd_path, f'SSDfeatures_{sub}_lfp_{ecog_side}.csv'),
                            index_col=0, header=0)
    # rename and add STN to ft-names
    rename_cols = {}
    for key in stn_fts.keys(): rename_cols[key] = f'STN_{key}'
    stn_fts = stn_fts.rename(columns=rename_cols)

    merged_fts = pd.concat([stn_fts, ecog_fts], axis=1, ignore_index=False)
    merged_fts.index = merged_fts.index / 60  # convert to minutes to agree with CDRS score
    FEATS[sub] = merged_fts
    
    
    

### 1c) Prepare Features and Scores

Remove features to exclude and get CDRS scores to remaining features

In [232]:
# REMOVE ROWS DUE TO DYSKINESIA ONLY (!!) IN NONE-ECOG-SIDE
for sub in SUBS:
  ft_times = FEATS[sub].index
  score_times = SCORES[sub]['dopa_time']

  remove_ft_idx = []
  # select feature-rows which are closest to a CDRS-moments which should be excluded
  for ft_row, t in enumerate(ft_times):
      t_diffs = abs(score_times - t)
      i = np.argmin(t_diffs)

      if score_times[i] in REMOVE_TIMES[sub]:
        remove_ft_idx.append(ft_times[i])  
          
  FEATS[sub] = FEATS[sub].drop(remove_ft_idx, axis=0)
  print(f'removed {len(remove_ft_idx)} rows in sub-{sub}')

removed 106 rows in sub-013
removed 244 rows in sub-014
removed 0 rows in sub-016
removed 0 rows in sub-009
removed 0 rows in sub-008
removed 0 rows in sub-012


In [233]:
# DEFINE CDRS LABELS FOR FEATURE WINDOW TIMES
FT_LABELS = {}

for sub in SUBS:
    ft_times = FEATS[sub].index

    ft_scores = []

    for t in ft_times:
        t_diffs = abs(SCORES[sub]['dopa_time'] - t)
        i = np.argmin(t_diffs)
        ft_scores.append(SCORES[sub].iat[i, 1])  # take column 1, is CDRS score

    FT_LABELS[sub] = ft_scores

    assert FEATS[sub].shape[0] == len(FT_LABELS[sub]), (
        'Feature DataFrame and Ft-Labels must have same length'
    )
# no_LID_sel = np.array(ft_scores) == 0
# LID_sel = np.array(ft_scores) >= LID_SCORE_INCL


### 2) Explore

In [235]:
for sub in SUBS:
    print(f'\n{sub}')
    bp_values_list = []
    bp_keys = []

    no_LID_sel = np.array(FT_LABELS[sub]) == 0
    LID_sel = np.array(FT_LABELS[sub]) >= LID_SCORE_INCL

    print(f'# of feats w/o LID {sum(no_LID_sel)}')
    print(f'# of feats WITH LID {sum(LID_sel)}')

    for n_col, ft in enumerate(FEATS[sub].keys()):
        values = FEATS[sub].values[:, n_col]
        # split values on Dyskinesia
        no_LID_values = values[no_LID_sel]
        LID_values = values[LID_sel]
        # drop NaNs
        no_LID_values = no_LID_values[~np.isnan(no_LID_values)]
        LID_values = LID_values[~np.isnan(LID_values)]
        # define mean and std of no-LID for Z-SCORE
        m = np.nanmean(no_LID_values)
        sd = np.nanstd(no_LID_values)
        # Z-SCORE Dyskinesia values
        Z_LID_values = (LID_values - m) / sd


        #ä add feat and z-score values to lists for BOXPLOT
        bp_values_list.append(list(Z_LID_values))
        bp_keys.append(ft)

    plt.boxplot(bp_values_list)

    plt.hlines(y=0, xmin=0, xmax=24, color='k', alpha=.3)
    plt.hlines(y=[-2, 2], xmin=0, xmax=24, color='r', alpha=.3)

    plt.ylim(-5, 5)

    plt.title(sub)
    plt.close()



013
# of feats w/o LID 185
# of feats WITH LID 166

014
# of feats w/o LID 322
# of feats WITH LID 0

016
# of feats w/o LID 274
# of feats WITH LID 88

009
# of feats w/o LID 129
# of feats WITH LID 261

008
# of feats w/o LID 24
# of feats WITH LID 255

012
# of feats w/o LID 33
# of feats WITH LID 388
