# Predict CDRS based on SSD'd Spectral Features

### 0) Load packages and functions

In [None]:
# Importing Python and external packages
import os
import sys
import importlib
import json
import csv
from dataclasses import dataclass, field, fields
from itertools import compress
import pandas as pd
import numpy as np
from itertools import product
import sklearn as sk
from scipy import signal, stats

import matplotlib.pyplot as plt



In [None]:
def get_project_path_in_notebook(
    subfolder: str = '',
):
    """
    Finds path of projectfolder from Notebook.
    Start running this once to correctly find
    other modules/functions
    """
    path = os.getcwd()

    while path[-20:] != 'dyskinesia_neurophys':

        path = os.path.dirname(path)
    
    return path

In [None]:
# define local storage directories
projectpath = get_project_path_in_notebook()
codepath = os.path.join(projectpath, 'code')
figpath = os.path.join(projectpath, 'figures')
datapath = os.path.join(projectpath, 'data')
feat_path = os.path.join(projectpath, 'results', 'features')

In [None]:
os.chdir(codepath)
# own utility functions
import utils.utils_fileManagement as utilsFiles
import utils.utils_windowing as utilsWindows
from utils.utils_fileManagement import (get_project_path,
                                        load_class_pickle,
                                        save_class_pickle,
                                        mergedData,
                                        correct_acc_class)
# own data preprocessing functions
import lfpecog_preproc.preproc_data_management as dataMng
import lfpecog_preproc.preproc_filters as fltrs
# own data exploration functions
import lfpecog_features.feats_read_proc_data as read_data
import lfpecog_plotting.expl_plotting as expl_plot
import lfpecog_features.feats_spectral_baseline as specBase
import lfpecog_features.feats_spectral_features as spectral
import lfpecog_features.feats_spectral_helpers as specHelp


import lfpecog_preproc.preproc_import_scores_annotations as importClin
import lfpecog_analysis.ft_processing_helpers as ftProc
import lfpecog_analysis.import_ephys_results as importResults
import lfpecog_analysis.get_acc_task_derivs as accDerivs

import lfpecog_plotting.plotHelpers as pltHelp
from lfpecog_plotting.plotHelpers import remove_duplicate_legend

import lfpecog_analysis.load_SSD_features as load_ssdFts
import lfpecog_analysis.ft_processing_helpers as ftProc

### 0) Define settings

In [None]:
WIN_LEN_sec = 10
WIN_OVERLAP_part = 0.0
IGNORE_PTS = ['010', ]

CDRS_THRESHOLD = .1  # from this score, features are labeled into LID+ group
CDRS_RATER = 'Patricia'


In [None]:
# get all available subs with features
# TODO: CHANGE subs
ssd_path = os.path.join(feat_path, 'SSD_powers',
                        f'windows_{WIN_LEN_sec}s_'
                        f'{WIN_OVERLAP_part}overlap')
SUBS = list(set([name.split('_')[1] for name in os.listdir(ssd_path)]))

for sub in IGNORE_PTS:
    SUBS.remove(sub)

- only include ECoG and ipsilateral STN LFP
- exclude moments where was only Dyskinesia in body-side ipsilateral to ECoG (NOT CORRESPONDING WITH ECoG-hemisphere)

## 1) Prepare Neurophysiological and Clinical Data

for now: only work with UNILATERAL data corresponding to ECoG hemisphere

In [347]:
# LOAD FEATURE via FeatureClass containing all features
importlib.reload(ftProc)
importlib.reload(load_ssdFts)

FEATS = {}
FT_LABELS = {}

for sub in SUBS:
    # LOAD MERGED NEUROPHYS FEATURES
    FEATS[sub] = ftProc.load_feature_df_for_pred(
        sub, INCL_POWER = True, INCL_COH_UNILAT = True,
        sel_bandwidths=['all'],
        sel_source='all',
        settings_json='ftExtr_spectral_v3.json',  # v3 is broadband-flanked SSD
    )
    
    # LOAD CLINICAL SCORES AND CORRESPONDING WINDOW SELECTION
    select_bool, ecog_related_cdrs = ftProc.get_idx_discardNonEcogLid(
        sub=sub, ft_times=FEATS[sub].index, cdrs_rater=CDRS_RATER,
    )
    # select features and clinical scores to include
    FEATS[sub] = FEATS[sub].iloc[select_bool]
    FT_LABELS[sub] = ecog_related_cdrs[select_bool]
    
    print(f'{sub}: rows delete: {sum(~select_bool)}, '
          f'shape post removal: {FEATS[sub].shape}')

print(f'features included: {FEATS[sub].keys()}') 


load SSDd features for sub-013
load COH: STN_STN: alpha
load COH: STN_STN: lo_beta
load COH: STN_STN: hi_beta
load COH: STN_STN: gamma
load COH: STN_ECOG: alpha
load COH: STN_ECOG: lo_beta
load COH: STN_ECOG: hi_beta
load COH: STN_ECOG: gamma
	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
013: rows delete: 79, shape post removal: (275, 32)

load SSDd features for sub-014
load COH: STN_STN: alpha
load COH: STN_STN: lo_beta
load COH: STN_STN: hi_beta
load COH: STN_STN: gamma
load COH: STN_ECOG: alpha
load COH: STN_ECOG: lo_beta
load COH: STN_ECOG: hi_beta
load COH: STN_ECOG: gamma
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
014: rows delete: 74, shape post removal: (262, 32)

load SSDd features for sub-016
load COH: STN_STN: alpha
load COH: STN_STN: lo_beta
load COH: STN_STN: hi_beta
load COH: STN

### 2) Prepare prediction arrays


In [None]:
import lfpecog_predict.prepare_predict_arrays as prep_pred_arrs

In [348]:
# Create arrays per subject based on features and labels

importlib.reload(prep_pred_arrs)

(X_total, y_total_binary,
 y_total_scale, sub_ids_total,
 ft_times_total, ft_names) = prep_pred_arrs.get_group_arrays_for_prediction(
    feat_dict=FEATS,
    label_dict=FT_LABELS,
    CDRS_THRESHOLD=.1,
    TO_PLOT = False)

# Merge subject-arrays to one group array for prediction
(X_all, y_all_binary,
 y_all_scale, sub_ids,
 ft_times_all) = prep_pred_arrs.merge_group_arrays(X_total=X_total,
                                    y_total_binary=y_total_binary,
                                    y_total_scale=y_total_scale,
                                    sub_ids_total=sub_ids_total,
                                    ft_times_total=ft_times_total)

(1912, 32) (1912,) (1912,) (1912,) (1912,)
out of n=1912 samples, n=1043 are Dyskinesia (54.6 %)


### 3) Explore feature values

In [None]:
import seaborn as sns
from lfpecog_plotting.plotHelpers import get_colors


In [None]:
def get_violin_ft_data(X_all, y_all_binary, ft_names):

    violin_df = pd.DataFrame(columns=['feature', 'values', 'lid'])

    violin_values = [X_all[:, i_ft] for i_ft in np.arange(X_all.shape[1])]
    violin_values = np.array(violin_values).ravel()

    violin_names = [[f] * X_all.shape[0] for f in ft_names]
    violin_names = np.array(violin_names).ravel()

    violin_y = [y_all_binary for i_ft in np.arange(X_all.shape[1])]
    violin_y = np.array(violin_y).ravel()

    violin_df['feature'] = violin_names
    violin_df['values'] = violin_values
    violin_df['lid'] = violin_y

    violin_ps = [stats.mannwhitneyu(X_all[:, i_f][y_all_binary == 0],
                                    X_all[:, i_f][y_all_binary == 1])[1]
                for i_f in np.arange(X_all.shape[1])]

    print(violin_df.shape, len(violin_ps))

    return violin_df, violin_ps

In [350]:
# Create violinplot LID vs no-LID
violin_df, violin_ps = get_violin_ft_data(X_all, y_all_binary, ft_names)

fsize=24
ALPHA = .01
ALPHA /= len(ft_names)

fig, ax = plt.subplots(1, 1, figsize=(24, 12))

sns.set_theme(style="whitegrid")
clrs = list(get_colors().values())

# Draw a nested violinplot and split the violins for easier comparison
violin = sns.violinplot(data=violin_df,
               x="feature", y="values",
               hue="lid",
               split=True,
               inner="quartile",
               linewidth=1,
               ax=ax,
               palette={0: clrs[4], 1: clrs[1]})

# change transparency based on sign differences
double_ps = []  # violin.collections desribes every half-violin, therefore double the p-values
for p in violin_ps: double_ps.extend([p, p])
for body, p_ft in zip(violin.collections, double_ps):
    
    if p_ft < ALPHA: body.set_alpha(1)
    else: body.set_alpha(.3)

# set properties quartile lines
for l in violin.lines:
    l.set_linestyle('--')
    l.set_linewidth(0.6)
    l.set_color('k')
    l.set_alpha(0.8)
for l in violin.lines[1::3]:
    l.set_linestyle('-')
    l.set_linewidth(2)
    l.set_color('black')
    l.set_alpha(0.8)

sns.despine(left=True)

ax.set_ylim(-5, 5)
ax.set_xticks(np.arange(len(ft_names)))
temp_ft_names = readable_ftnames(ft_names)
ax.set_xticklabels(temp_ft_names, rotation=60, ha='right',
                   size=fsize)
for i_tick, p in enumerate(violin_ps):
    if p < ALPHA: ax.get_xticklabels()[i_tick].set_weight('bold')
ax.set_ylabel('z-scored values (a.u.)', size=fsize + 8)
ax.set_xlabel('')
plt.tick_params(axis='both', size=fsize, labelsize=fsize+2)

h, l = ax.get_legend_handles_labels()
l = ['no LID', 'LID']
ax.legend(h, l, fontsize=fsize+8, frameon=False,
          ncol=2, loc='lower right')
ax.set_title('Feature differences (10-s windows): without-LID versus with-LID'
             f'   (Bonf. corrected alpha = {round(ALPHA, 4)})',
             size=fsize + 8, weight='bold')

plt.tight_layout()

fname = f'violinFeats_powCoh_binaryLID_signs_broadbandSSD'
plt.savefig(os.path.join(figpath, 'prediction', fname),
            facecolor='w', dpi=300,)

plt.close()


(61184, 3) 32


### 4) Prediction

In [215]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import LeaveOneGroupOut

from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

# performance
from sklearn.metrics import (
    classification_report, accuracy_score,
    confusion_matrix, plot_confusion_matrix, ConfusionMatrixDisplay,
    auc, roc_curve, RocCurveDisplay
)

In [None]:
import lfpecog_predict.predict_helpers as predHelpers
import lfpecog_plotting.plot_pred_standards as plotPred

Loop over predictions with different features

In [291]:
importlib.reload(ftProc)

total_sub_AUC, total_sub_acc = {}, {}
total_group_AUC, total_group_acc = {}, {}

feat_sources = ['all', 'lfp', 'ecog']
feat_bwidths = ['all', 'alpha', 'lo_beta', 'hi_beta', 'gamma']

# LOOP OVER BANDWIDTHS AND FT-SOURCES TO INCLUDE
for source in feat_sources:
    total_sub_AUC[source], total_sub_acc[source] = {}, {}
    total_group_AUC[source], total_group_acc[source] = {}, {}
    
    for bw in feat_bwidths:
        print(f'start {source.upper()}, {bw.upper()}')
        temp_FEATS = {}
        temp_LABELS = {}
        # get specified features per subject
        for sub in SUBS:
            # LOAD MERGED NEUROPHYS FEATURES
            temp_FEATS[sub] = ftProc.load_feature_df_for_pred(
                sub, INCL_POWER = True, INCL_COH_UNILAT = True,
                sel_bandwidths=[bw],
                sel_source=source,#
                # TODO: ADD FT-EXTRACTION JSON
            )
            # LOAD CLINICAL SCORES AND CORRESPONDING WINDOW SELECTION
            select_bool, ecog_related_cdrs = ftProc.get_idx_discardNonEcogLid(
                sub=sub,
                ft_times=temp_FEATS[sub].index,
                cdrs_rater=CDRS_RATER,
            )
            # select features and clinical scores to include
            temp_FEATS[sub] = temp_FEATS[sub].iloc[select_bool]
            temp_LABELS[sub] = ecog_related_cdrs[select_bool]
            
            # print(f'{sub}: rows delete: {sum(~select_bool)}, '
            #     f'shape post removal: {temp_FEATS[sub].shape}')

            # print(f'features included: {temp_FEATS[sub].keys()}')

        # Merge subject-arrays to one group array for prediction
        (X_total, y_total_binary,
        y_total_scale, sub_ids_total,
        ft_times_total, ft_names) = prep_pred_arrs.get_group_arrays_for_prediction(
            feat_dict=temp_FEATS,
            label_dict=temp_LABELS,
            CDRS_THRESHOLD=.1,
            TO_PLOT = False)

        (X_all, y_all_binary,
        y_all_scale, sub_ids,
        ft_times_all) = prep_pred_arrs.merge_group_arrays(
            X_total=X_total, y_total_binary=y_total_binary,
            y_total_scale=y_total_scale, sub_ids_total=sub_ids_total,
            ft_times_total=ft_times_total)

        # REAL PREDICTIONS returned per Subject
        preds_subs, importances_sub = predHelpers.perform_prediction(
            X=X_all.copy(),
            y=y_all_binary.copy(),
            groups=sub_ids.ravel(),
            cv_method=LeaveOneGroupOut,
            clf_method='lda',
            perform_random_perm = False,
            n_perms = 0,
            verbose = False,
            return_dict_per_sub=True
        )
        temp_auc_scores, temp_acc_scores = [], []
        for sub in preds_subs.keys():
            
            fpr, tpr, _ = roc_curve(preds_subs[sub]['true'],
                                preds_subs[sub]['proba'][:, 1],)
            auc_score = round(auc(fpr, tpr), 3)
            acc_score = accuracy_score(preds_subs[sub]['true'],
                                    preds_subs[sub]['pred'])

            temp_auc_scores.append(auc_score)
            temp_acc_scores.append(acc_score)

        total_sub_AUC[source][bw] = temp_auc_scores
        total_sub_acc[source][bw] = temp_acc_scores

        
        # REAL PREDICT as one merged group
        y_true_all, y_pred_all, y_pred_conf_all, importances = predHelpers.perform_prediction(
            X=X_all.copy(),
            y=y_all_binary.copy(),
            groups=sub_ids.ravel(),
            cv_method=LeaveOneGroupOut,
            clf_method='lda',
            perform_random_perm = False,
            n_perms = 0,
            verbose = True,
            ft_names=ft_names
        )
        fpr, tpr, _ = roc_curve(y_true_all,
                                y_pred_conf_all)
        auc_score = round(auc(fpr, tpr), 3)
        acc_score = accuracy_score(preds_subs[sub]['true'],
                                    preds_subs[sub]['pred'])
        total_group_AUC[source][bw] = auc_score
        total_group_acc[source][bw] = acc_score

    

start ALL, ALL

load SSDd features for sub-013
	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, COH FEATS SHAPE INCLUDED: (355, 8)
	sub-016, MERGED FEATS SHAPE INCLUDED: (362, 32)

load SSDd features for sub-008
	sub-008, POWER FEATS SHAPE INCLUDED: (279, 24)
	sub-008, COH FEATS SHAPE INCLUDED: (271, 8)
	sub-008, MERGED FEATS SHAPE INCLUDED: (279, 32)

load SSDd features for sub-009
	sub-009, POWER FEATS SHAPE INCLUDED: (390, 24)
	sub-009, COH FEATS SHAPE INCLUDED: (380, 8)
	sub-009, MERGED FEATS SHAPE INCLUDED: (390, 32)

load SSDd features for sub-012
	sub-012, POWER FEATS SHAPE INCLUDED: (421, 24)
	sub-012, COH FEATS SHAPE INCLUDED:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



 ######## FOLD 0 ########
	fold tests sub-008
	# of samples: train 1641, test 271
              precision    recall  f1-score   support

           0       0.64      0.78      0.70        89
           1       0.88      0.79      0.83       182

    accuracy                           0.78       271
   macro avg       0.76      0.78      0.76       271
weighted avg       0.80      0.78      0.79       271


 ######## FOLD 1 ########
	fold tests sub-009
	# of samples: train 1532, test 380
              precision    recall  f1-score   support

           0       0.53      0.85      0.65       128
           1       0.89      0.62      0.73       252

    accuracy                           0.69       380
   macro avg       0.71      0.73      0.69       380
weighted avg       0.77      0.69      0.70       380


 ######## FOLD 2 ########
	fold tests sub-012
	# of samples: train 1515, test 397
              precision    recall  f1-score   support

           0       0.18      0.88      0.3

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after ['lo_beta'] selection: (354, 8)

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ['lo_beta'] selection: (336, 8)

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, COH FEATS SHAPE INCLUDED: (355, 8)
	sub-016, MERGED FEATS SHAPE INCLUDED: (362, 32)
	sub-016, MERGED FEATS SHAPE after ['lo_beta'] selection: (362, 8)

load SSDd features for sub-008
	sub-008, POWER FEATS SHAPE INCLUDED: (279, 24)
	sub-008, COH FEATS SHAPE INCLUDED: (271, 8)
	sub-008, MERGED FEATS SHAPE INCLUDED: (279, 32)
	sub-008, MERGED FEATS SHAPE after ['lo_beta'] selection: (279, 8)

load SSDd features for sub-009
	sub-009, POWER FEATS SHAPE INCLUDED

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after ['hi_beta'] selection: (354, 8)

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ['hi_beta'] selection: (336, 8)

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, COH FEATS SHAPE INCLUDED: (355, 8)
	sub-016, MERGED FEATS SHAPE INCLUDED: (362, 32)
	sub-016, MERGED FEATS SHAPE after ['hi_beta'] selection: (362, 8)

load SSDd features for sub-008
	sub-008, POWER FEATS SHAPE INCLUDED: (279, 24)
	sub-008, COH FEATS SHAPE INCLUDED: (271, 8)
	sub-008, MERGED FEATS SHAPE INCLUDED: (279, 32)
	sub-008, MERGED FEATS SHAPE after ['hi_beta'] selection: (279, 8)

load SSDd features for sub-009
	sub-009, POWER FEATS SHAPE INCLUDED

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after ['gamma'] selection: (354, 8)

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ['gamma'] selection: (336, 8)

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, COH FEATS SHAPE INCLUDED: (355, 8)
	sub-016, MERGED FEATS SHAPE INCLUDED: (362, 32)
	sub-016, MERGED FEATS SHAPE after ['gamma'] selection: (362, 8)

load SSDd features for sub-008
	sub-008, POWER FEATS SHAPE INCLUDED: (279, 24)
	sub-008, COH FEATS SHAPE INCLUDED: (271, 8)
	sub-008, MERGED FEATS SHAPE INCLUDED: (279, 32)
	sub-008, MERGED FEATS SHAPE after ['gamma'] selection: (279, 8)

load SSDd features for sub-009
	sub-009, POWER FEATS SHAPE INCLUDED: (390, 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after lfp selection: (354, 12)
	Included feats for lfp: Index(['lfp_right_alpha_max_psd', 'lfp_right_alpha_mean_psd',
       'lfp_right_alpha_variation', 'lfp_right_lo_beta_max_psd',
       'lfp_right_lo_beta_mean_psd', 'lfp_right_lo_beta_variation',
       'lfp_right_hi_beta_max_psd', 'lfp_right_hi_beta_mean_psd',
       'lfp_right_hi_beta_variation', 'lfp_right_narrow_gamma_max_psd',
       'lfp_right_narrow_gamma_mean_psd', 'lfp_right_narrow_gamma_variation'],
      dtype='object')

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after lfp selection: (336, 12)
	Included feats for lfp: Index(['lfp_right_alpha_max_psd', 'lfp_right_alpha_mean_psd',
       'lfp_right_

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after ['alpha'] selection: (354, 8)
	sub-013, MERGED FEATS SHAPE after lfp selection: (354, 3)
	Included feats for lfp: Index(['lfp_right_alpha_max_psd', 'lfp_right_alpha_mean_psd',
       'lfp_right_alpha_variation'],
      dtype='object')

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ['alpha'] selection: (336, 8)
	sub-014, MERGED FEATS SHAPE after lfp selection: (336, 3)
	Included feats for lfp: Index(['lfp_right_alpha_max_psd', 'lfp_right_alpha_mean_psd',
       'lfp_right_alpha_variation'],
      dtype='object')

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, COH FEATS SHAPE INCLUDED: (355, 8)
	sub-016, MERGED FE

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after ['lo_beta'] selection: (354, 8)
	sub-013, MERGED FEATS SHAPE after lfp selection: (354, 3)
	Included feats for lfp: Index(['lfp_right_lo_beta_max_psd', 'lfp_right_lo_beta_mean_psd',
       'lfp_right_lo_beta_variation'],
      dtype='object')

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ['lo_beta'] selection: (336, 8)
	sub-014, MERGED FEATS SHAPE after lfp selection: (336, 3)
	Included feats for lfp: Index(['lfp_right_lo_beta_max_psd', 'lfp_right_lo_beta_mean_psd',
       'lfp_right_lo_beta_variation'],
      dtype='object')

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, COH FEATS SHAPE INCLUDED: (355, 8)
	su

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after ['hi_beta'] selection: (354, 8)
	sub-013, MERGED FEATS SHAPE after lfp selection: (354, 3)
	Included feats for lfp: Index(['lfp_right_hi_beta_max_psd', 'lfp_right_hi_beta_mean_psd',
       'lfp_right_hi_beta_variation'],
      dtype='object')

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ['hi_beta'] selection: (336, 8)
	sub-014, MERGED FEATS SHAPE after lfp selection: (336, 3)
	Included feats for lfp: Index(['lfp_right_hi_beta_max_psd', 'lfp_right_hi_beta_mean_psd',
       'lfp_right_hi_beta_variation'],
      dtype='object')

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, COH FEATS SHAPE INCLUDED: (355, 8)
	su

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after ['gamma'] selection: (354, 8)
	sub-013, MERGED FEATS SHAPE after lfp selection: (354, 3)
	Included feats for lfp: Index(['lfp_right_narrow_gamma_max_psd', 'lfp_right_narrow_gamma_mean_psd',
       'lfp_right_narrow_gamma_variation'],
      dtype='object')

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ['gamma'] selection: (336, 8)
	sub-014, MERGED FEATS SHAPE after lfp selection: (336, 3)
	Included feats for lfp: Index(['lfp_right_narrow_gamma_max_psd', 'lfp_right_narrow_gamma_mean_psd',
       'lfp_right_narrow_gamma_variation'],
      dtype='object')

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, COH FEATS SH

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, MERGED FEATS SHAPE after ecog selection: (354, 12)
	Included feats for ecog: Index(['ecog_right_alpha_max_psd', 'ecog_right_alpha_mean_psd',
       'ecog_right_alpha_variation', 'ecog_right_lo_beta_max_psd',
       'ecog_right_lo_beta_mean_psd', 'ecog_right_lo_beta_variation',
       'ecog_right_hi_beta_max_psd', 'ecog_right_hi_beta_mean_psd',
       'ecog_right_hi_beta_variation', 'ecog_right_narrow_gamma_max_psd',
       'ecog_right_narrow_gamma_mean_psd',
       'ecog_right_narrow_gamma_variation'],
      dtype='object')

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ecog selection: (336, 12)
	Included feats for ecog: Index(['ecog_right_alpha_max_psd', 'ecog_right_alpha_mean_psd',
       'ecog_right_alpha_variation', 'ecog_right_lo_beta_max_psd',
       'ecog_right_lo_beta_mean_psd', 'ecog_right_lo_beta_variation'

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after ['alpha'] selection: (354, 8)
	sub-013, MERGED FEATS SHAPE after ecog selection: (354, 3)
	Included feats for ecog: Index(['ecog_right_alpha_max_psd', 'ecog_right_alpha_mean_psd',
       'ecog_right_alpha_variation'],
      dtype='object')

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ['alpha'] selection: (336, 8)
	sub-014, MERGED FEATS SHAPE after ecog selection: (336, 3)
	Included feats for ecog: Index(['ecog_right_alpha_max_psd', 'ecog_right_alpha_mean_psd',
       'ecog_right_alpha_variation'],
      dtype='object')

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, COH FEATS SHAPE INCLUDED: (355, 8)
	sub-016,

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after ['lo_beta'] selection: (354, 8)
	sub-013, MERGED FEATS SHAPE after ecog selection: (354, 3)
	Included feats for ecog: Index(['ecog_right_lo_beta_max_psd', 'ecog_right_lo_beta_mean_psd',
       'ecog_right_lo_beta_variation'],
      dtype='object')

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ['lo_beta'] selection: (336, 8)
	sub-014, MERGED FEATS SHAPE after ecog selection: (336, 3)
	Included feats for ecog: Index(['ecog_right_lo_beta_max_psd', 'ecog_right_lo_beta_mean_psd',
       'ecog_right_lo_beta_variation'],
      dtype='object')

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, COH FEATS SHAPE INCLUDED: (3

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after ['hi_beta'] selection: (354, 8)
	sub-013, MERGED FEATS SHAPE after ecog selection: (354, 3)
	Included feats for ecog: Index(['ecog_right_hi_beta_max_psd', 'ecog_right_hi_beta_mean_psd',
       'ecog_right_hi_beta_variation'],
      dtype='object')

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ['hi_beta'] selection: (336, 8)
	sub-014, MERGED FEATS SHAPE after ecog selection: (336, 3)
	Included feats for ecog: Index(['ecog_right_hi_beta_max_psd', 'ecog_right_hi_beta_mean_psd',
       'ecog_right_hi_beta_variation'],
      dtype='object')

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, COH FEATS SHAPE INCLUDED: (3

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


	sub-013, POWER FEATS SHAPE INCLUDED: (354, 24)
	sub-013, COH FEATS SHAPE INCLUDED: (340, 8)
	sub-013, MERGED FEATS SHAPE INCLUDED: (354, 32)
	sub-013, MERGED FEATS SHAPE after ['gamma'] selection: (354, 8)
	sub-013, MERGED FEATS SHAPE after ecog selection: (354, 3)
	Included feats for ecog: Index(['ecog_right_narrow_gamma_max_psd', 'ecog_right_narrow_gamma_mean_psd',
       'ecog_right_narrow_gamma_variation'],
      dtype='object')

load SSDd features for sub-014
	sub-014, POWER FEATS SHAPE INCLUDED: (336, 24)
	sub-014, COH FEATS SHAPE INCLUDED: (332, 8)
	sub-014, MERGED FEATS SHAPE INCLUDED: (336, 32)
	sub-014, MERGED FEATS SHAPE after ['gamma'] selection: (336, 8)
	sub-014, MERGED FEATS SHAPE after ecog selection: (336, 3)
	Included feats for ecog: Index(['ecog_right_narrow_gamma_max_psd', 'ecog_right_narrow_gamma_mean_psd',
       'ecog_right_narrow_gamma_variation'],
      dtype='object')

load SSDd features for sub-016
	sub-016, POWER FEATS SHAPE INCLUDED: (362, 24)
	sub-016, CO

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# CHECK DIFFERENCE LFP AND ALL PREDITIVE OUTCOMES

In [298]:
clrs = list(get_colors().values())
fsize=18
fig, ax = plt.subplots(1, 1, figsize=(12, 4))

# plot sources as different bars
# plot ECoG AND STN
ax.bar(np.arange(len(feat_bwidths)) - .3,
       list(total_group_AUC['all'].values()),
       color=clrs[:len(feat_bwidths)],
       width=.25, alpha=.75, align='center',)
# for legend
ax.bar([0], [-1], color='gray', alpha=.8,
       label='STN LFP + ECoG',)

# plot only ECoG
ax.bar(np.arange(len(feat_bwidths)),
       list(total_group_AUC['ecog'].values()),
       color=clrs[:len(feat_bwidths)],
       width=.25, alpha=.75, align='center',
       hatch='*',)
ax.bar([0], [-1], color='gray', alpha=.8,
       label='ECoG only', hatch='*')
# plot only STN
ax.bar(np.arange(len(feat_bwidths)) + .3,
       list(total_group_AUC['lfp'].values()),
       color=clrs[:len(feat_bwidths)],
       width=.25, alpha=.75, align='center',
       hatch='//',)
ax.bar([0], [-1], color='gray', alpha=.8,
       label='STN LFP only', hatch='//')

ax.set_ylim(0 ,1)
ax.set_ylabel('Area Under ROC (a.u.)',
              fontsize=fsize,)
ax.set_xlabel('Feature selection',
              fontsize=fsize,)
ax.set_xticks(np.arange(len(feat_bwidths)),)
ax.set_xticklabels(feat_bwidths,
                   fontsize=fsize)

ax.legend(fontsize=fsize, ncol=3)
ax.set_title('Predictive performance: different feature'
             ' frequencies and sources',
             fontsize=fsize+4, weight='bold')
             
plt.tick_params(axis='both', size=fsize,
                labelsize=fsize)
plt.tight_layout()

figname = 'LID_binary_differentFeatSources_AUCs'
plt.savefig(os.path.join(figpath, 'prediction', figname),
            facecolor='w', dpi=300,)

plt.close()

In [235]:
print(f'AUCs per subject: {total_sub_AUC}')

print(f'AUCs for group: {total_group_AUC}')

AUCs per subject: {'all': [0.846, 0.808, 0.79, 0.72, nan, 0.582], 'alpha': [0.596, 0.695, 0.656, 0.525, nan, 0.538], 'lo_beta': [0.754, 0.852, 0.779, 0.705, nan, 0.563], 'hi_beta': [0.469, 0.393, 0.427, 0.586, nan, 0.481], 'gamma': [0.838, 0.537, 0.695, 0.576, nan, 0.548]}
AUCs for group: {'all': 0.725, 'alpha': 0.509, 'lo_beta': 0.715, 'hi_beta': 0.259, 'gamma': 0.515}


Perform single 'best' prediction

In [351]:
importlib.reload(predHelpers)

# REAL PREDICT
y_true_all, y_pred_all, y_pred_conf_all, importances = predHelpers.perform_prediction(
    X=X_all.copy(), y=y_all_binary.copy(), groups=sub_ids.ravel(),
    cv_method=LeaveOneGroupOut,
    clf_method='lda',
    perform_random_perm = False,
    n_perms = 0,
    verbose = True,
    ft_names=ft_names
)


 ######## FOLD 0 ########
	fold tests sub-008
	# of samples: train 1641, test 271
              precision    recall  f1-score   support

           0       0.59      0.84      0.69        89
           1       0.90      0.71      0.80       182

    accuracy                           0.76       271
   macro avg       0.75      0.78      0.75       271
weighted avg       0.80      0.76      0.76       271


 ######## FOLD 1 ########
	fold tests sub-009
	# of samples: train 1532, test 380
              precision    recall  f1-score   support

           0       0.76      0.84      0.80       128
           1       0.92      0.87      0.89       252

    accuracy                           0.86       380
   macro avg       0.84      0.85      0.84       380
weighted avg       0.86      0.86      0.86       380


 ######## FOLD 2 ########
	fold tests sub-012
	# of samples: train 1515, test 397
              precision    recall  f1-score   support

           0       0.22      0.91      0.3

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


plot mean feature importances

In [311]:
def readable_ftnames(ft_names):
    temp_ftnames = []
    for f in ft_names:
        if 'ecog' in f: f=f.replace('ecog_right', 'ecog')
        elif 'lfp' in f: f=f.replace('lfp_right', 'lfp')
        if 'narrow_gamma' in f: f=f.replace('narrow_gamma', 'gamma')
        temp_ftnames.append(f.upper())

    return temp_ftnames

In [312]:
fig, ax = plt.subplots(1, 1, figsize=(18, 12))

fsize=20

sort_idx = np.argsort(np.mean(importances, axis=0))
temp_ftnames = readable_ftnames(ft_names)

ax.bar(np.arange(importances.shape[1]),
       np.mean(importances, axis=0)[sort_idx],)

ax.set_xticks(np.arange(len(ft_names)))
ax.set_xticklabels(np.array(temp_ftnames)[sort_idx],
                   rotation=60, ha='right', size=fsize)
ax.set_ylabel('importances (a.u.)', size=fsize + 8)
ax.set_xlabel('')

plt.tick_params(axis='both', size=fsize, labelsize=fsize+2)
plt.tight_layout()

fname = f'binaryLID_pred_ftImportances_powCoh_lda'
# plt.savefig(os.path.join(figpath, 'prediction', fname),
#             facecolor='w', dpi=300,)

plt.close()

In [352]:
importlib.reload(predHelpers)

# PERMUTATIONS
perm_tpr, perm_fpr = predHelpers.perform_prediction(
    X=X_all.copy(), y=y_all_binary.copy(), groups=sub_ids.ravel(),
    cv_method=LeaveOneGroupOut,
    clf_method='lda',
    perform_random_perm=True,
    n_perms=500,
    perm_return_ROC=True,
    verbose=False,
)

Plot AUROC

In [354]:
# plot AUROC

auc_perms = []

fig, ax = plt.subplots(1,1, figsize=(6, 6))
fs = 18
for x_p, y_p in zip(perm_fpr, perm_tpr):
    ax.plot(x_p, y_p, alpha=.2, lw=.5, c='k',)
    auc_perms.append(auc(x_p, y_p))

alpha01 = np.percentile(auc_perms, 99)
fpr, tpr, _ = roc_curve(y_true_all, y_pred_conf_all,)
auc_score = round(auc(fpr, tpr), 2)
ax.plot(fpr, tpr, c='darkgreen', lw=2,
        label=f'Prediction\n(AUC: {auc_score})',
)
ax.plot(0, 0, c='k', label=f'Permutations (n=500)\nalpha 0.01: {round(alpha01, 2)})')
ax.plot([0, 1], [0, 1], lw=3,  c='orange', label='Chance level (50/50)')

ax.set_xlabel('False Positive Rate', fontsize=fs, weight='bold',)
ax.set_ylabel('True Positive Rate', fontsize=fs, weight='bold',)
ax.set_title('Dyskinesia Prediction - Receiver Operator Curve'
            '\nLeave-One-Subject-Out cross-validation',
            fontsize=fs)

ax.legend(frameon=False, fontsize=fs, loc='lower right')
plt.tick_params(axis='both', labelsize=fs)
plt.tight_layout()
fname = f'Group_LID_PRED_LDA_PowCoh'
fname += '_broadbSSD'
# plt.savefig(os.path.join(figpath, 'prediction', fname),
#             facecolor='w', dpi=300,)

plt.close()


In [355]:
importlib.reload(plotPred)

# Leave-One_subject-Out

# show metrics summary
print(classification_report(y_true_all, y_pred_all))

# show confusion matrix
cm = confusion_matrix(y_true_all, y_pred_all)
cm_figname = 'Group_LID_Pred_LDA_powCoh_confMatrix'
# plotPred.plot_confMatrix(cm, fig_path=figpath, fig_name=cm_figname,
#                          to_show=False, to_save=True)

# show Receiver Operator Cruve
fpr, tpr, _ = roc_curve(y_true_all, y_pred_conf_all,)
auc_score = auc(fpr, tpr)
acc_score = accuracy_score(y_true_all, y_pred_all)
print(f'AUC: {round(auc_score, 3)}, Accuracy: {round(acc_score, 3)}')
# roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot()


              precision    recall  f1-score   support

           0       0.68      0.77      0.72       869
           1       0.79      0.69      0.74      1043

    accuracy                           0.73      1912
   macro avg       0.73      0.73      0.73      1912
weighted avg       0.74      0.73      0.73      1912

AUC: 0.778, Accuracy: 0.73


Show individual prediction course

In [356]:
importlib.reload(predHelpers)

# REAL PREDICTIONS returned per Subject
preds_subs, importances_sub = predHelpers.perform_prediction(
    X=X_all.copy(), y=y_all_binary.copy(), groups=sub_ids.ravel(),
    cv_method=LeaveOneGroupOut,
    clf_method='lda',
    perform_random_perm = False,
    n_perms = 0,
    verbose = False,
    return_dict_per_sub=True
)

In [358]:
PLOT_PROBA = False
clrs = list(get_colors().values())

fig, axes = plt.subplots(len(SUBS), 1, figsize=(8, 12))
fs = 14
for i_s, sub in enumerate(SUBS):
    handles, labels = [], []

    plot_preds = preds_subs[sub]['pred']
    if PLOT_PROBA: plot_probas = preds_subs[sub]['proba'][:, 1]
    sub_sel = sub_ids == sub
    plot_cdrs = y_all_scale[sub_sel]  # get CDRS as full scale
    plot_fttimes = ft_times_all[sub_sel]
    assert len(plot_preds) == len(plot_cdrs), (
        '# predictions and # scores not equal'
    )

    ymax = max(plot_cdrs)
    if ymax == 0: ymax = 1
    
    # fill moments where LID was predicted
    axes[i_s].fill_between(plot_fttimes,
                           y1=-0, y2=ymax,
                           where=plot_preds == 1, alpha=.4,
                           color=clrs[1],
                           label='LID predicted')
    # fill moments where NO LID was predicted
    axes[i_s].fill_between(plot_fttimes,
                           y1=-0, y2=ymax,
                           color=clrs[4],
                           where=plot_preds == 0, alpha=.4,
                           label='No LID predicted')
    
    # plot probabilities of prediction
    if PLOT_PROBA:
        ax2 = axes[i_s].twinx()  # create second y-axis for probabilities
        ax2.plot(plot_fttimes, plot_probas, lw=.8, color='purple',
                alpha=.8, label='Predicted probability')
        ax2.set_ylim(0, 1)
        ax2.set_ylabel('Predicted\nprobability', fontsize=fs, weight='bold',)
        ax2.tick_params(axis='both', labelsize=fs, size=fs,)
        for side in ['top',]:
            ax2.spines[side].set_visible(False)
        hnd, lab = ax2.get_legend_handles_labels()
        handles.extend(list(hnd))
        labels.extend(list(lab))

    # plot CDRS as full scale
    axes[i_s].plot(plot_fttimes, plot_cdrs, lw=3, color='green',
                      label='Real CDRS (unilat.)')

    axes[i_s].set_title(f'sub-{sub}', weight='bold', fontsize=fs)
    axes[i_s].set_xlabel('Time (minutes vs L-Dopa intake)',
                         fontsize=fs, )
    axes[i_s].set_ylabel('Dyskinesia\n(CDRS)',
                         fontsize=fs, weight='bold',)
    hnd, lab = axes[i_s].get_legend_handles_labels()
    handles.extend(list(hnd))
    labels.extend(list(lab))

axes[0].legend(handles, labels, frameon=False,
            loc='lower center', bbox_to_anchor=(.5, 1.2),
            fancybox=False, shadow=False,
            borderaxespad=1, ncol=3,
            prop={
                # 'weight': 'bold',
                'size': fs
            }
)

# plt.suptitle('Individual binary Dyskinesia-Predictions vs CDRS',
#             #  weight='bold',
#              fontsize=fs+4)

for ax in axes:
    ax.tick_params(axis='both', labelsize=fs, size=fs,)
    for side in ['top','right']:
        ax.spines[side].set_visible(False)
plt.tight_layout()

fname = f'Indiv_binLID_predict_PowCoh_vs_CDRSscale_{CDRS_RATER}'
# fname += '_broadbSSD'
# plt.savefig(os.path.join(figpath, 'prediction', fname),
#             facecolor='w', dpi=300,)
plt.close()

#### Analyse Movement percentages

In [None]:
importlib.reload(accDerivs)
accs, labels = {}, {}
for sub in FT_LABELS.keys():
    print(f'start sub {sub}')
    accs[sub], labels[sub] = accDerivs.load_acc_and_task(
        sub=sub, dataversion='v3.0', resample_freq=500)

Plot polar plot movement percentages in binary Groups

In [None]:

ACT = {}

fig, axes = plt.subplots(len(FEATS.keys()), 1,
                         figsize=(8, len(FEATS.keys()) * 2))

for i_s, sub in enumerate(FEATS.keys()):

    sub_preds = preds_subs[sub]['pred']
    # if PLOT_PROBA: plot_probas = preds_subs[sub]['proba'][:, 1]
    # select labels and times for sub (included in prediction)
    sub_sel = sub_ids == sub
    sub_cdrs = y_all_scale[sub_sel]  # get CDRS as full scale
    sub_LID = y_all_binary[sub_sel]  # get binary LID
    sub_fttimes = ft_times_all[sub_sel]

    # get accelerometer info
    ecog_side = importClin.get_ecog_side(sub=sub)
    if ecog_side == 'right': body_side = 'left'
    elif ecog_side == 'left': body_side = 'right'

    acc_sub = []  # list to store

    for t in sub_fttimes:
        t = t * 60  # convert to seconds for acc-data
        idx_sel = np.logical_and(labels[sub].index.values > t,
                                 labels[sub].index.values < (t + WIN_LEN_sec))
        act = labels[sub][idx_sel][[f'{body_side}_tap', f'{body_side}_move']]
        acc_sub.append(sum(np.max(act, axis=1).values) / act.shape[0] * 100)

    ACT[sub] = np.array(acc_sub)

    assert len(ACT[sub]) == len(sub_LID) == len(sub_preds), (
        f'ACC ({len(ACT[sub])}), feat lengths ({len(sub_LID)})'
        f', and pred lengths ({len(sub_preds)}) not equal')
    
    axes[i_s].plot(acc_sub, label='activitiy %')
    axes[i_s].fill_between(x=np.arange(len(sub_LID)), y1=0, y2=10,
                     label='true LID binary',
                     where=sub_LID, color='orange', alpha=.5,)
    axes[i_s].set_title(sub)
    axes[i_s].set_ylabel('unilateral activitiy-%')
    axes[i_s].set_xlabel(f'{WIN_LEN_sec}s-windows')
    axes[i_s].legend()
plt.tight_layout()

figname = 'indivMovement_vs_BinaryLID'
plt.savefig(os.path.join(figpath, 'prediction', figname),
        dpi=300, facecolor='w',)

plt.close()

In [None]:
clrs = list(get_colors().values())

total_act_prc = {'Dyskinesia ABSENT': {'all': [],
                                       'predicted present': [],
                                       'predicted absent': []},
                 'Dyskinesia PRESENT': {'all': [],
                                       'predicted present': [],
                                       'predicted absent': []}}

fig, axes = plt.subplots(len(FEATS.keys()), 2,
                         figsize=(8, len(FEATS.keys()) * 3))

for i_sub, sub in enumerate(FEATS.keys()):

    for LID_BIN, LID_NAME  in enumerate(['Dyskinesia ABSENT', 'Dyskinesia PRESENT']):
        sub_sel = sub_ids == sub
        sub_LID = y_all_binary[sub_sel]  # get TRUE binary LID
        true_lid_mask = sub_LID == LID_BIN
        act_only_TRUE_LID_sel = ACT[sub][true_lid_mask]
        
        axes[i_sub, LID_BIN].hist(act_only_TRUE_LID_sel,
                           label=f'all true {LID_NAME}',
                           color=clrs[0], alpha=.3,)
        total_act_prc[LID_NAME]['all'].extend(act_only_TRUE_LID_sel)  # store in total dict

        sub_preds = preds_subs[sub]['pred']  # get PREDICTED binary labels
        preds_only_TRUE_LID_sel = sub_preds[true_lid_mask]

        
        pred_mask = preds_only_TRUE_LID_sel == 1

        if True in pred_mask:

            axes[i_sub, LID_BIN].hist(act_only_TRUE_LID_sel[~pred_mask],
                            label=f'no-LID-predicted',
                            color=clrs[5], alpha=.5, align='left',)
            total_act_prc[LID_NAME]['predicted absent'].extend(
                act_only_TRUE_LID_sel[~pred_mask])  # store in total dict

        if False in pred_mask:

            axes[i_sub, LID_BIN].hist(act_only_TRUE_LID_sel[pred_mask],
                            label=f'LID-predicted',
                            color=clrs[2], alpha=.5, align='right',)
            total_act_prc[LID_NAME]['predicted present'].extend(
                act_only_TRUE_LID_sel[pred_mask])  # store in total dict
                
        axes[i_sub, LID_BIN].set_title(f'sub-{sub}: expert-rated {LID_NAME}')
        axes[i_sub, LID_BIN].set_ylabel('observations')
        axes[i_sub, LID_BIN].set_xlabel('Activity per window (%)')
        axes[i_sub, LID_BIN].legend()
    
plt.tight_layout()  


figname = 'binaryLID_pred_INDIVmovementDistribution'
plt.savefig(os.path.join(figpath, 'prediction', figname),
            dpi=300, facecolor='w',)

plt.close()

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

for LID_BIN, LID_NAME  in enumerate(['Dyskinesia ABSENT', 'Dyskinesia PRESENT']):
    
    axes[LID_BIN].hist(total_act_prc[LID_NAME]['all'],
                        label=f'all true {LID_NAME}',
                        color=clrs[0], alpha=.3,)
    
    for i2, PRED_NAME in enumerate(['predicted absent', 'predicted present']):
        aligns = ['left', 'right']
        axes[LID_BIN].hist(total_act_prc[LID_NAME][PRED_NAME],
                            label=PRED_NAME,
                            color=clrs[5-i2*3], alpha=.5, align=aligns[i2],)
    
    axes[LID_BIN].set_title(f'expert-rated {LID_NAME}')
    axes[LID_BIN].set_ylabel('observations')
    axes[LID_BIN].set_xlabel('Activity per window (%)')
    axes[LID_BIN].legend()

plt.close()

Polar plot for mvoement distribution during LID and true/false predictions

In [None]:


fig, axes = plt.subplots(1, 2, figsize=(16, 8),
                       subplot_kw={"projection": "polar"},
                       )

fontsize = 14
false_colors = np.array(clrs)[[0, 2]]
correct_colors = np.array(clrs)[[3, 5]]


for LID_BIN, TRUE_LID_NAME  in enumerate(['Dyskinesia ABSENT',
                                          'Dyskinesia PRESENT']):
    
    if 'present' in TRUE_LID_NAME.lower():
        preds_correct = total_act_prc[TRUE_LID_NAME]['predicted present']
        preds_false = total_act_prc[TRUE_LID_NAME]['predicted absent']
    elif 'absent' in TRUE_LID_NAME.lower():
        preds_correct = total_act_prc[TRUE_LID_NAME]['predicted absent']
        preds_false = total_act_prc[TRUE_LID_NAME]['predicted present']
    else:
        raise ValueError('no present absent found')

    n_bins = len(preds_correct) + len(preds_false)

    ANGLES = np.linspace(0, 2 * np.pi, n_bins, endpoint=False) + (np.pi/2)
    WIDTH = 2 * np.pi / n_bins

    ACT_PRCS = list(preds_correct) + list(preds_false)

    axes[LID_BIN].bar(x=ANGLES[:len(preds_correct)],
                      height=np.array(preds_correct) + 5, #bottom=-10,
                    #   height=sorted(np.array(preds_correct) + 5, reverse=True), #bottom=-10,
                      color=false_colors[LID_BIN], alpha=0.8,
                      width=WIDTH,
                      label=f'preds correct ({round(len(preds_correct)/n_bins*100)}%)')
    axes[LID_BIN].bar(x=ANGLES[len(preds_correct):],
                      height=np.array(preds_false) + 5, #bottom=-10,  # plus 5 to show zeros
                    #   height=sorted(np.array(preds_false) + 5), #bottom=-10,  # plus 5 to show zeros
                      color=correct_colors[LID_BIN], alpha=0.8,
                      width=WIDTH,
                      label=f'preds false ({round(len(preds_false)/n_bins*100)} %)')

    axes[LID_BIN].set_ylim(0, 35)
    axes[LID_BIN].set_yticks(np.arange(0, 31, 5))
    axes[LID_BIN].set_yticklabels([' '] + [f'{y}%' for y in np.arange(0, 26, 5)],
                                  fontsize=fontsize)
    axes[LID_BIN].set_xticks([])
    axes[LID_BIN].set_xticklabels([], )

    axes[LID_BIN].set_title(f'{TRUE_LID_NAME} (expert-rated)',
                            fontsize=fontsize + 4, weight='bold')
    axes[LID_BIN].set_ylabel(f'Activity per {WIN_LEN_sec}s-window (%)',
                             fontsize=fontsize + 4)
    axes[LID_BIN].legend(fontsize=fontsize + 4,
                         frameon=False, ncol=2, loc='upper center',
                         bbox_to_anchor=(.5, -.05))

    print('plotted', TRUE_LID_NAME)

figname = 'binaryLID_pred_movementDistribution'
plt.savefig(os.path.join(figpath, 'prediction', figname),
            dpi=300, facecolor='w',)
plt.close()