# Predict CDRS based on SSD'd Spectral Features

### 0) Load packages and functions

In [None]:
# Importing Python and external packages
import os
import sys
import importlib
import json
import csv
from dataclasses import dataclass, field, fields
from itertools import compress
import pandas as pd
import numpy as np
from itertools import product
import sklearn as sk
from scipy import signal, stats

import matplotlib.pyplot as plt



In [None]:
def get_project_path_in_notebook(
    subfolder: str = '',
):
    """
    Finds path of projectfolder from Notebook.
    Start running this once to correctly find
    other modules/functions
    """
    path = os.getcwd()

    while path[-20:] != 'dyskinesia_neurophys':

        path = os.path.dirname(path)
    
    return path

In [None]:
# define local storage directories
projectpath = get_project_path_in_notebook()
codepath = os.path.join(projectpath, 'code')
figpath = os.path.join(projectpath, 'figures')
datapath = os.path.join(projectpath, 'data')
feat_path = os.path.join(projectpath, 'results', 'features')

In [None]:
os.chdir(codepath)
# own utility functions
import utils.utils_fileManagement as utilsFiles
# own data exploration functions
import lfpecog_features.feats_read_proc_data as read_data

import lfpecog_preproc.preproc_import_scores_annotations as importClin
import lfpecog_analysis.ft_processing_helpers as ftProc
import lfpecog_analysis.import_ephys_results as importResults


import lfpecog_analysis.load_SSD_features as load_ssdFts
import lfpecog_analysis.ft_processing_helpers as ftProc
import lfpecog_features.feats_helper_funcs as ftHelp
from lfpecog_features.get_ssd_data import get_subject_SSDs

### 0) Define settings

In [None]:
WIN_LEN_sec = 10
WIN_OVERLAP_part = 0.5
DATA_VERSION = 'v4.0'    # v4.0: new artef-rem, no reref; v3.0 multiple re-ref
FT_VERSION = 'v6'  # v4: broad-flanks, bursts; v3: broad-flanked SSD
INCL_PSD=['mean_psd', 'variation']
INCL_BURST = False
INCL_COH = True
IGNORE_PTS = ['011', '104', '106']
INCL_STN_ONLY_PTS = True

CDRS_RATER = 'Jeroen'
ANALYSIS_SIDE = 'BILAT'
INCL_CORE_CDRS = True
CATEGORICAL_CDRS = False

In [None]:
# get all available subs with features
SUBS = utilsFiles.get_avail_ssd_subs(
    DATA_VERSION=DATA_VERSION,
    FT_VERSION=FT_VERSION,
    IGNORE_PTS=IGNORE_PTS
)
print(f'SUBS: n={len(SUBS)} ({SUBS})')


- only include ECoG and ipsilateral STN LFP
- exclude moments where was only Dyskinesia in body-side ipsilateral to ECoG (NOT CORRESPONDING WITH ECoG-hemisphere)

### 1a) Prepare Neurophysiological and Clinical Data


In [None]:
importlib.reload(load_ssdFts)

# use as single ft example to debug/develop
sub_fts = load_ssdFts.ssdFeatures(
    sub_list=['023'],
    settings_json='ftExtr_spectral_v6.json',
    data_version='v4.0',
)

In [None]:
# Visualize categorical CDRS conversion
importlib.reload(ftProc)
importlib.reload(load_ssdFts)

sub='012'

plt.plot(FEATS[sub].index, FT_LABELS[sub], label='CDRS')

mc_y = ftProc.categorical_CDRS(
    y_full_scale=FT_LABELS[sub],
    time_minutes=FEATS[sub].index,
    preLID_minutes=10,
    preLID_separate=True,
    convert_inBetween_zeros='mild',
)
plt.plot(FEATS[sub].index, mc_y, label='categ')

plt.legend()

plt.show()


Prepare all FEATS and CDRS-LABELS

In [None]:
# LOAD FEATURE via FeatureClass containing all features
importlib.reload(importClin)
importlib.reload(ftProc)
importlib.reload(load_ssdFts)

FEATS = {}
FT_LABELS = {}

for sub in SUBS:
    # if sub != '023': continue
    print(f'load {sub}')
    
    # LOAD MERGED NEUROPHYS FEATURES
    # PM: CORRECT BACK TO PATRICIA IN LOAD_SSD AFTER SCORING
    FEATS[sub] = ftProc.load_feature_df_for_pred(
        sub,
        INCL_PSD=INCL_PSD,
        INCL_COH=INCL_COH,
        INCL_BURSTS=INCL_BURST,
        LATERALITY=ANALYSIS_SIDE,
        EXCL_IPSI_ECOG=True,
        sel_bandwidths=['all'],
        sel_source='lfp',
        preproc_data_version=DATA_VERSION,
        settings_json=f'ftExtr_spectral_{FT_VERSION}.json',
        verbose=False,
    )

    # LOAD CLINICAL SCORES AND CORRESPONDING WINDOW SELECTION
    select_bool, ecog_related_cdrs = ftProc.find_select_nearest_CDRS_for_ephys(
        sub=sub, ft_times=FEATS[sub].index,
        cdrs_rater=CDRS_RATER,
        side=ANALYSIS_SIDE, INCL_CORE_CDRS=INCL_CORE_CDRS,
    )
    # select features and clinical scores to include
    FEATS[sub] = FEATS[sub].iloc[select_bool]
    FT_LABELS[sub] = ecog_related_cdrs[select_bool]

    if CATEGORICAL_CDRS:
        FT_LABELS[sub] = ftProc.categorical_CDRS(
            y_full_scale=FT_LABELS[sub],
            time_minutes=FEATS[sub].index,
            preLID_minutes=10,
            preLID_separate=True,
            convert_inBetween_zeros='mild',
        )
      

# print(f'features included: {FEATS[sub].keys()}') 

### 1b) Explore features vs CDRS


In [None]:
FEATS['020'].keys()

In [None]:
feats_incl = ['lfp_left_alpha_mean_psd',
              'lfp_left_lo_beta_mean_psd',
              'lfp_left_gamma3_mean_psd',
              'lfp_right_alpha_mean_psd',
              'lfp_right_lo_beta_mean_psd',
              'lfp_right_gamma3_mean_psd',
              'imag_coh_STN_STN_alpha',
              'imag_coh_STN_STN_lo_beta',
              'imag_coh_STN_STN_gamma3']

corrs = {f: [] for f in feats_incl}

for sub in list(FEATS.keys())[:]:
    # check for non-dysk
    if (FT_LABELS[sub] == 0).all():
            print(f'no dyskinesia for sub-{sub}')
            continue
    
    for f_sel in feats_incl:
        # define ft-values and rated cdrs-scores
        temp_values = FEATS[sub][f_sel]
        temp_labels = FT_LABELS[sub].copy()
        # check and correct for NaNs
        nan_sel = np.isnan(temp_values)
        if nan_sel.any():
            temp_values = temp_values[~nan_sel]
            temp_labels = temp_labels[~nan_sel]
        # calculate and add corr for subject and feature
        R, p = stats.pearsonr(temp_values, temp_labels)
        corrs[f_sel].append(R)
        # except:
        #      print(sub, f_sel, sum(nan_sel))
        #      print(np.isnan(temp_values).any())    
        #      print(sub, np.isnan(temp_labels).any())    




In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 8),)

ax.boxplot([Rs for Rs in corrs.values()],)
ax.axhline(y=0, color='gray',
           alpha=.3,)
ax.set_xticklabels(corrs.keys(), rotation=90,)

ax.tick_params(axis='both', size=14, labelsize=14)

plt.tight_layout()

plt.show()

In [None]:
temp_labels[~nan_sel]

### 2) Prepare prediction arrays


In [None]:
import lfpecog_predict.prepare_predict_arrays as prep_pred_arrs

In [None]:
# Create arrays per subject based on features and labels

importlib.reload(prep_pred_arrs)

(X_total, y_total_binary,
 y_total_scale, sub_ids_total,
 ft_times_total, ft_names) = prep_pred_arrs.get_group_arrays_for_prediction(
    feat_dict=FEATS,
    label_dict=FT_LABELS,
    CDRS_CODING='binary',  # categorical
    CDRS_THRESHOLD=.1,
    TO_PLOT = False)

# Merge subject-arrays to one group array for prediction
(X_all, y_all_binary,
 y_all_scale, sub_ids,
 ft_times_all) = prep_pred_arrs.merge_group_arrays(X_total=X_total,
                                    y_total_binary=y_total_binary,
                                    y_total_scale=y_total_scale,
                                    sub_ids_total=sub_ids_total,
                                    ft_times_total=ft_times_total)


print(f'Subjects included: {np.unique(sub_ids)}')

### 3) Explore feature values

In [None]:
import seaborn as sns
from lfpecog_plotting.plotHelpers import get_colors
import lfpecog_plotting.plot_SSD_feat_descriptives as plot_ssd_descr

In [None]:
import gpboost as gpb

Plot violin or boxplots for features in different CDRS scores/categories

In [None]:
### PLOT BINARY FEATURE DIFFERENCES

importlib.reload(plot_ssd_descr)

incl_ft_sources = 'ECOG'  # 'ECOG', 'ALL'
sign_test = 'glmm'

fig_path = os.path.join(figpath, 'prediction', ANALYSIS_SIDE.lower(),
                                 f'ft_version_{FT_VERSION}',
                                 f'n_is_{len(SUBS)}')

fig_name = f'violinFeats{incl_ft_sources}_ftV4_powCoh_binaryLID_{sign_test}'
if SSD_broad_flanks: fig_name += '_broadbandSSD'
fig_name += f'_n{len(SUBS)}'

plot_ssd_descr.plot_binary_featViolins(
    X_all, y_all_scale, sub_ids, ft_names,
    incl_ft_sources = incl_ft_sources,
    SHOW_PLOT=False, SAVE_PLOT=False,
    fig_name=fig_name,
)


In [None]:
from itertools import product

In [None]:
### PLOT CATEGORICAL FEATURE DIFFERENCES

importlib.reload(plot_ssd_descr)

incl_ft_sources = 'STN'  # 'ECOG', 'ALL'
sign_test = 'pearson'

fig_path = os.path.join(figpath, 'ft_exploration', 'v4.0',
                        # ANALYSIS_SIDE.lower(),
                        #          f'ft_version_{FT_VERSION}',
                        #          f'n_is_{len(SUBS)}'
)

fig_name_ftBox = f'boxFeats{incl_ft_sources}_ftV4_powCoh_categLID2meanPJ_{sign_test}'
if SSD_broad_flanks: fig_name_ftBox += '_broadbandSSD'
fig_name_ftBox += f'_n{len(SUBS)}'

fig_name_corrBar = f'barCorrs{incl_ft_sources}_ftV4_powCoh_categLID_{sign_test}'
if SSD_broad_flanks: fig_name_corrBar += '_broadbandSSD'
fig_name_corrBar += f'_n{len(SUBS)}'

plot_ssd_descr.plot_feats_on_categLID(
    X_all, y_all_scale, sub_ids, ft_names,
    incl_ft_sources = incl_ft_sources,
    sign_test='glmm',
    SHOW_FT_BOXPLOT=False,
    SAVE_FT_BOXPLOT=True,
    SHOW_CORR_BARPLOT=False,
    SAVE_CORR_BARPLOT=False,
    SAVE_COEF_FIGS=False,
    fig_name_ftBox=fig_name_ftBox,
    fig_name_corrBar=fig_name_corrBar,
    fig_path=fig_path,
)

### 4) Prediction

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import LeaveOneGroupOut

from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

# performance
from sklearn.metrics import (
    classification_report, accuracy_score,
    confusion_matrix, plot_confusion_matrix, ConfusionMatrixDisplay,
    auc, roc_curve, RocCurveDisplay
)

In [None]:
import lfpecog_predict.predict_helpers as predHelpers
import lfpecog_plotting.plot_pred_standards as plotPredStd
import lfpecog_plotting.plot_pred_standards as plotPred

Loop over predictions with different features

In [None]:
importlib.reload(ftProc)

total_sub_AUC, total_sub_acc = {}, {}
total_group_AUC, total_group_acc = {}, {}

feat_sources = ['all', 'lfp', 'ecog']
feat_bwidths = ['all', 'alpha', 'lo_beta', 'hi_beta', 'gamma']

# LOOP OVER BANDWIDTHS AND FT-SOURCES TO INCLUDE
for source in feat_sources:
    total_sub_AUC[source], total_sub_acc[source] = {}, {}
    total_group_AUC[source], total_group_acc[source] = {}, {}
    
    for bw in feat_bwidths:
        print(f'start {source.upper()}, {bw.upper()}')
        temp_FEATS = {}
        temp_LABELS = {}
        # get specified features per subject
        for sub in SUBS:
            # LOAD MERGED NEUROPHYS FEATURES
            temp_FEATS[sub] = ftProc.load_feature_df_for_pred(
                sub, INCL_POWER = True, INCL_COH_UNILAT = True,
                sel_bandwidths=[bw],
                sel_source=source,
                settings_json='ftExtr_spectral_v3.json',  # v3 is broadband-flanked SSD
                verbose=False,
            )
            # LOAD CLINICAL SCORES AND CORRESPONDING WINDOW SELECTION
            (
                select_bool, ecog_related_cdrs
            ) = ftProc.find_select_nearest_CDRS_for_ephys(
                sub=sub,
                ft_times=temp_FEATS[sub].index,
                cdrs_rater=CDRS_RATER,
            )
            # select features and clinical scores to include
            temp_FEATS[sub] = temp_FEATS[sub].iloc[select_bool]
            temp_LABELS[sub] = ecog_related_cdrs[select_bool]
            
            
        # Merge subject-arrays to one group array for prediction
        (X_total, y_total_binary,
        y_total_scale, sub_ids_total,
        ft_times_total, ft_names) = prep_pred_arrs.get_group_arrays_for_prediction(
            feat_dict=temp_FEATS,
            label_dict=temp_LABELS,
            CDRS_THRESHOLD=.1,
            TO_PLOT = False)

        (X_all, y_all_binary,
        y_all_scale, sub_ids,
        ft_times_all) = prep_pred_arrs.merge_group_arrays(
            X_total=X_total, y_total_binary=y_total_binary,
            y_total_scale=y_total_scale, sub_ids_total=sub_ids_total,
            ft_times_total=ft_times_total)

        # REAL PREDICTIONS returned per Subject
        preds_subs, importances_sub = predHelpers.perform_prediction(
            X=X_all.copy(),
            y=y_all_binary.copy(),
            groups=sub_ids.ravel(),
            cv_method=LeaveOneGroupOut,
            clf_method='lda',
            perform_random_perm = False,
            n_perms = 0,
            verbose = False,
            return_dict_per_sub=True
        )
        temp_auc_scores, temp_acc_scores = [], []
        for sub in preds_subs.keys():
            
            fpr, tpr, _ = roc_curve(preds_subs[sub]['true'],
                                preds_subs[sub]['proba'][:, 1],)
            auc_score = round(auc(fpr, tpr), 3)
            acc_score = accuracy_score(preds_subs[sub]['true'],
                                    preds_subs[sub]['pred'])

            temp_auc_scores.append(auc_score)
            temp_acc_scores.append(acc_score)

        total_sub_AUC[source][bw] = temp_auc_scores
        total_sub_acc[source][bw] = temp_acc_scores

        
        # REAL PREDICT as one merged group
        y_true_all, y_pred_all, y_pred_conf_all, importances = predHelpers.perform_prediction(
            X=X_all.copy(),
            y=y_all_binary.copy(),
            groups=sub_ids.ravel(),
            cv_method=LeaveOneGroupOut,
            clf_method='lda',
            perform_random_perm = False,
            n_perms = 0,
            verbose = True,
            ft_names=ft_names
        )
        fpr, tpr, _ = roc_curve(y_true_all,
                                y_pred_conf_all)
        auc_score = round(auc(fpr, tpr), 3)
        acc_score = accuracy_score(preds_subs[sub]['true'],
                                    preds_subs[sub]['pred'])
        total_group_AUC[source][bw] = auc_score
        total_group_acc[source][bw] = acc_score

    

In [None]:
# CHECK DIFFERENCE LFP AND ALL PREDITIVE OUTCOMES

In [None]:
clrs = list(get_colors().values())
fsize=18
fig, ax = plt.subplots(1, 1, figsize=(12, 4))

# plot sources as different bars
# plot ECoG AND STN
ax.bar(np.arange(len(feat_bwidths)) - .3,
       list(total_group_AUC['all'].values()),
       color=clrs[:len(feat_bwidths)],
       width=.25, alpha=.75, align='center',)
# for legend
ax.bar([0], [-1], color='gray', alpha=.8,
       label='STN LFP + ECoG',)

# plot only ECoG
ax.bar(np.arange(len(feat_bwidths)),
       list(total_group_AUC['ecog'].values()),
       color=clrs[:len(feat_bwidths)],
       width=.25, alpha=.75, align='center',
       hatch='*',)
ax.bar([0], [-1], color='gray', alpha=.8,
       label='ECoG only', hatch='*')
# plot only STN
ax.bar(np.arange(len(feat_bwidths)) + .3,
       list(total_group_AUC['lfp'].values()),
       color=clrs[:len(feat_bwidths)],
       width=.25, alpha=.75, align='center',
       hatch='//',)
ax.bar([0], [-1], color='gray', alpha=.8,
       label='STN LFP only', hatch='//')

ax.set_ylim(0 ,1)
ax.set_ylabel('Area Under ROC (a.u.)',
              fontsize=fsize,)
ax.set_xlabel('Feature selection',
              fontsize=fsize,)
ax.set_xticks(np.arange(len(feat_bwidths)),)
ax.set_xticklabels(feat_bwidths,
                   fontsize=fsize)

ax.legend(fontsize=fsize, ncol=3)
ax.set_title('Predictive performance: different feature'
             ' frequencies and sources',
             fontsize=fsize+4, weight='bold')
             
plt.tick_params(axis='both', size=fsize,
                labelsize=fsize)
plt.tight_layout()

figname = 'LID_binary_differentFeatSources_AUCs'
if SSD_broad_flanks: figname += '_broadbandSSD'
figname += f'_n{len(SUBS)}'
plt.savefig(os.path.join(figpath, 'prediction', f'n_is_{len(SUBS)}', figname),
            facecolor='w', dpi=300,)

plt.close()

In [None]:
print(f'AUCs per subject: {total_sub_AUC}')

print(f'AUCs for group: {total_group_AUC}')

Perform single 'best' prediction

In [None]:
importlib.reload(predHelpers)

# REAL PREDICT
y_true_all, y_pred_all, y_pred_conf_all, importances = predHelpers.perform_prediction(
    X=X_all.copy(), y=y_all_binary.copy(), groups=sub_ids.ravel(),
    cv_method=LeaveOneGroupOut,
    clf_method='lda',
    perform_random_perm = False,
    n_perms = 0,
    verbose = True,
    ft_names=ft_names
)

plot mean feature importances

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(18, 12))

fsize=20

sort_idx = np.argsort(np.mean(importances, axis=0))
temp_ftnames = plot_ssd_descr.readable_ftnames(ft_names)

ax.bar(np.arange(importances.shape[1]),
       np.mean(importances, axis=0)[sort_idx],)

ax.set_xticks(np.arange(len(ft_names)))
ax.set_xticklabels(np.array(temp_ftnames)[sort_idx],
                   rotation=60, ha='right', size=fsize)
ax.set_ylabel('importances (a.u.)', size=fsize + 8)
ax.set_xlabel('')

plt.tick_params(axis='both', size=fsize, labelsize=fsize+2)
plt.tight_layout()

fname = f'binaryLID_pred_ftImportances_ftsV4_lda'
# plt.savefig(os.path.join(figpath, 'prediction', fname),
#             facecolor='w', dpi=300,)

plt.close()

In [None]:
importlib.reload(predHelpers)

# PERMUTATIONS
perm_tpr, perm_fpr = predHelpers.perform_prediction(
    X=X_all.copy(), y=y_all_binary.copy(), groups=sub_ids.ravel(),
    cv_method=LeaveOneGroupOut,
    clf_method='lda',
    perform_random_perm=True,
    n_perms=100,
    perm_return_ROC=True,
    verbose=False,
)

Plot AUROC

In [None]:
# plot AUROC

auc_perms = []

fig, ax = plt.subplots(1,1, figsize=(6, 6))
fs = 18
for x_p, y_p in zip(perm_fpr, perm_tpr):
    ax.plot(x_p, y_p, alpha=.2, lw=.5, c='k',)
    auc_perms.append(auc(x_p, y_p))

alpha01 = np.percentile(auc_perms, 99)
fpr, tpr, _ = roc_curve(y_true_all, y_pred_conf_all,)
auc_score = round(auc(fpr, tpr), 2)
ax.plot(fpr, tpr, c='darkgreen', lw=2,
        label=f'Prediction\n(AUC: {auc_score})',
)
ax.plot(0, 0, c='k', label=f'Permutations (n=500)\nalpha 0.01: {round(alpha01, 2)})')
ax.plot([0, 1], [0, 1], lw=3,  c='orange', label='Chance level (50/50)')

ax.set_xlabel('False Positive Rate', fontsize=fs, weight='bold',)
ax.set_ylabel('True Positive Rate', fontsize=fs, weight='bold',)
ax.set_title('Dyskinesia Prediction - Receiver Operator Curve'
            '\nLeave-One-Subject-Out cross-validation',
            fontsize=fs)

ax.legend(frameon=False, fontsize=fs, loc='lower right')
plt.tick_params(axis='both', labelsize=fs)
plt.tight_layout()
fname = f'Group_LID_PRED_LDA_PowCoh'
fname += '_broadbSSD'
# plt.savefig(os.path.join(figpath, 'prediction', fname),
#             facecolor='w', dpi=300,)

plt.show()


In [None]:
importlib.reload(plotPred)

# Leave-One_subject-Out

# show metrics summary
print(classification_report(y_true_all, y_pred_all))

# show confusion matrix
cm = confusion_matrix(y_true_all, y_pred_all)
cm_figname = 'Group_LID_Pred_LDA_powCoh_confMatrix'
# plotPred.plot_confMatrix(cm, fig_path=figpath, fig_name=cm_figname,
#                          to_show=False, to_save=True)

# show Receiver Operator Cruve
fpr, tpr, _ = roc_curve(y_true_all, y_pred_conf_all,)
auc_score = auc(fpr, tpr)
acc_score = accuracy_score(y_true_all, y_pred_all)
print(f'AUC: {round(auc_score, 3)}, Accuracy: {round(acc_score, 3)}')
# roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot()


Show individual prediction course

In [None]:
importlib.reload(predHelpers)

# REAL PREDICTIONS returned per Subject
preds_subs, importances_sub = predHelpers.perform_prediction(
    X=X_all.copy(),
    y=y_all_binary.copy(),
    # y=y_all_scale.copy(),
    groups=sub_ids.ravel(),
    cv_method=LeaveOneGroupOut,
    clf_method='lda',
    perform_random_perm = False,
    n_perms = 0,
    verbose = False,
    return_dict_per_sub=True
)

In [None]:
import lfpecog_plotting.plot_LID_predictions as plot_preds


In [None]:
importlib.reload(plot_preds)

figname = f'Indiv_scaleLID_predict_lfpPows_{CDRS_RATER}'
figname += '_broadbSSD'

pred_fig_dir = os.path.join(figpath, 'prediction', ANALYSIS_SIDE.lower(),
                            f'ft_version_{FT_VERSION}',
                            f'n{len(SUBS)}')

plot_preds.plot_sub_gradual_preds(
    preds_subs=preds_subs, SUBS=SUBS,
    sub_ids=sub_ids,
    ft_times_all=ft_times_all,
    PLOT_FIG=True,
    SAVE_FIG=False,
    smooth_pred_samples=10,
    fig_name=figname, fig_dir=pred_fig_dir
)

In [None]:

importlib.reload(plot_preds)

figname = f'Indiv_binLID_predict_v4Set2_{CDRS_RATER}'
figname += '_broadbSSD'

pred_fig_dir = os.path.join(figpath, 'prediction', ANALYSIS_SIDE.lower(),
                            f'ft_version_{FT_VERSION}',
                            f'n{len(SUBS)}')

plot_preds.plot_sub_binary_preds(
    preds_subs=preds_subs, SUBS=SUBS,
    sub_ids=sub_ids,
    y_all_scale=y_all_scale,
    ft_times_all=ft_times_all,
    PLOT_FIG=False,
    SAVE_FIG=False,
    fig_name=figname, fig_dir=pred_fig_dir)

#### Analyse Movement percentages

In [None]:
importlib.reload(accDerivs)
accs, labels = {}, {}
for sub in FT_LABELS.keys():
    print(f'start sub {sub}')
    accs[sub], labels[sub] = accDerivs.load_acc_and_task(
        sub=sub, dataversion='v3.0', resample_freq=500)

Plot polar plot movement percentages in binary Groups

In [None]:

ACT = {}

fig, axes = plt.subplots(len(FEATS.keys()), 1,
                         figsize=(8, len(FEATS.keys()) * 2))

for i_s, sub in enumerate(FEATS.keys()):

    sub_preds = preds_subs[sub]['pred']
    # if PLOT_PROBA: plot_probas = preds_subs[sub]['proba'][:, 1]
    # select labels and times for sub (included in prediction)
    sub_sel = sub_ids == sub
    sub_cdrs = y_all_scale[sub_sel]  # get CDRS as full scale
    sub_LID = y_all_binary[sub_sel]  # get binary LID
    sub_fttimes = ft_times_all[sub_sel]

    # get accelerometer info
    ecog_side = importClin.get_ecog_side(sub=sub)
    if ecog_side == 'right': body_side = 'left'
    elif ecog_side == 'left': body_side = 'right'

    acc_sub = []  # list to store

    for t in sub_fttimes:
        t = t * 60  # convert to seconds for acc-data
        idx_sel = np.logical_and(labels[sub].index.values > t,
                                 labels[sub].index.values < (t + WIN_LEN_sec))
        act = labels[sub][idx_sel][[f'{body_side}_tap', f'{body_side}_move']]
        acc_sub.append(sum(np.max(act, axis=1).values) / act.shape[0] * 100)

    ACT[sub] = np.array(acc_sub)

    assert len(ACT[sub]) == len(sub_LID) == len(sub_preds), (
        f'ACC ({len(ACT[sub])}), feat lengths ({len(sub_LID)})'
        f', and pred lengths ({len(sub_preds)}) not equal')
    
    axes[i_s].plot(acc_sub, label='activitiy %')
    axes[i_s].fill_between(x=np.arange(len(sub_LID)), y1=0, y2=10,
                     label='true LID binary',
                     where=sub_LID, color='orange', alpha=.5,)
    axes[i_s].set_title(sub)
    axes[i_s].set_ylabel('unilateral activitiy-%')
    axes[i_s].set_xlabel(f'{WIN_LEN_sec}s-windows')
    axes[i_s].legend()
plt.tight_layout()

figname = 'indivMovement_vs_BinaryLID'
plt.savefig(os.path.join(figpath, 'prediction', figname),
        dpi=300, facecolor='w',)

plt.close()

In [None]:
clrs = list(get_colors().values())

total_act_prc = {'Dyskinesia ABSENT': {'all': [],
                                       'predicted present': [],
                                       'predicted absent': []},
                 'Dyskinesia PRESENT': {'all': [],
                                       'predicted present': [],
                                       'predicted absent': []}}

fig, axes = plt.subplots(len(FEATS.keys()), 2,
                         figsize=(8, len(FEATS.keys()) * 3))

for i_sub, sub in enumerate(FEATS.keys()):

    for LID_BIN, LID_NAME  in enumerate(['Dyskinesia ABSENT', 'Dyskinesia PRESENT']):
        sub_sel = sub_ids == sub
        sub_LID = y_all_binary[sub_sel]  # get TRUE binary LID
        true_lid_mask = sub_LID == LID_BIN
        act_only_TRUE_LID_sel = ACT[sub][true_lid_mask]
        
        axes[i_sub, LID_BIN].hist(act_only_TRUE_LID_sel,
                           label=f'all true {LID_NAME}',
                           color=clrs[0], alpha=.3,)
        total_act_prc[LID_NAME]['all'].extend(act_only_TRUE_LID_sel)  # store in total dict

        sub_preds = preds_subs[sub]['pred']  # get PREDICTED binary labels
        preds_only_TRUE_LID_sel = sub_preds[true_lid_mask]

        
        pred_mask = preds_only_TRUE_LID_sel == 1

        if True in pred_mask:

            axes[i_sub, LID_BIN].hist(act_only_TRUE_LID_sel[~pred_mask],
                            label=f'no-LID-predicted',
                            color=clrs[5], alpha=.5, align='left',)
            total_act_prc[LID_NAME]['predicted absent'].extend(
                act_only_TRUE_LID_sel[~pred_mask])  # store in total dict

        if False in pred_mask:

            axes[i_sub, LID_BIN].hist(act_only_TRUE_LID_sel[pred_mask],
                            label=f'LID-predicted',
                            color=clrs[2], alpha=.5, align='right',)
            total_act_prc[LID_NAME]['predicted present'].extend(
                act_only_TRUE_LID_sel[pred_mask])  # store in total dict
                
        axes[i_sub, LID_BIN].set_title(f'sub-{sub}: expert-rated {LID_NAME}')
        axes[i_sub, LID_BIN].set_ylabel('observations')
        axes[i_sub, LID_BIN].set_xlabel('Activity per window (%)')
        axes[i_sub, LID_BIN].legend()
    
plt.tight_layout()  


figname = 'binaryLID_pred_INDIVmovementDistribution'
plt.savefig(os.path.join(figpath, 'prediction', figname),
            dpi=300, facecolor='w',)

plt.close()

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

for LID_BIN, LID_NAME  in enumerate(['Dyskinesia ABSENT', 'Dyskinesia PRESENT']):
    
    axes[LID_BIN].hist(total_act_prc[LID_NAME]['all'],
                        label=f'all true {LID_NAME}',
                        color=clrs[0], alpha=.3,)
    
    for i2, PRED_NAME in enumerate(['predicted absent', 'predicted present']):
        aligns = ['left', 'right']
        axes[LID_BIN].hist(total_act_prc[LID_NAME][PRED_NAME],
                            label=PRED_NAME,
                            color=clrs[5-i2*3], alpha=.5, align=aligns[i2],)
    
    axes[LID_BIN].set_title(f'expert-rated {LID_NAME}')
    axes[LID_BIN].set_ylabel('observations')
    axes[LID_BIN].set_xlabel('Activity per window (%)')
    axes[LID_BIN].legend()

plt.close()

Polar plot for mvoement distribution during LID and true/false predictions

In [None]:


fig, axes = plt.subplots(1, 2, figsize=(16, 8),
                       subplot_kw={"projection": "polar"},
                       )

fontsize = 14
false_colors = np.array(clrs)[[0, 2]]
correct_colors = np.array(clrs)[[3, 5]]


for LID_BIN, TRUE_LID_NAME  in enumerate(['Dyskinesia ABSENT',
                                          'Dyskinesia PRESENT']):
    
    if 'present' in TRUE_LID_NAME.lower():
        preds_correct = total_act_prc[TRUE_LID_NAME]['predicted present']
        preds_false = total_act_prc[TRUE_LID_NAME]['predicted absent']
    elif 'absent' in TRUE_LID_NAME.lower():
        preds_correct = total_act_prc[TRUE_LID_NAME]['predicted absent']
        preds_false = total_act_prc[TRUE_LID_NAME]['predicted present']
    else:
        raise ValueError('no present absent found')

    n_bins = len(preds_correct) + len(preds_false)

    ANGLES = np.linspace(0, 2 * np.pi, n_bins, endpoint=False) + (np.pi/2)
    WIDTH = 2 * np.pi / n_bins

    ACT_PRCS = list(preds_correct) + list(preds_false)

    axes[LID_BIN].bar(x=ANGLES[:len(preds_correct)],
                      height=np.array(preds_correct) + 5, #bottom=-10,
                    #   height=sorted(np.array(preds_correct) + 5, reverse=True), #bottom=-10,
                      color=false_colors[LID_BIN], alpha=0.8,
                      width=WIDTH,
                      label=f'preds correct ({round(len(preds_correct)/n_bins*100)}%)')
    axes[LID_BIN].bar(x=ANGLES[len(preds_correct):],
                      height=np.array(preds_false) + 5, #bottom=-10,  # plus 5 to show zeros
                    #   height=sorted(np.array(preds_false) + 5), #bottom=-10,  # plus 5 to show zeros
                      color=correct_colors[LID_BIN], alpha=0.8,
                      width=WIDTH,
                      label=f'preds false ({round(len(preds_false)/n_bins*100)} %)')

    axes[LID_BIN].set_ylim(0, 35)
    axes[LID_BIN].set_yticks(np.arange(0, 31, 5))
    axes[LID_BIN].set_yticklabels([' '] + [f'{y}%' for y in np.arange(0, 26, 5)],
                                  fontsize=fontsize)
    axes[LID_BIN].set_xticks([])
    axes[LID_BIN].set_xticklabels([], )

    axes[LID_BIN].set_title(f'{TRUE_LID_NAME} (expert-rated)',
                            fontsize=fontsize + 4, weight='bold')
    axes[LID_BIN].set_ylabel(f'Activity per {WIN_LEN_sec}s-window (%)',
                             fontsize=fontsize + 4)
    axes[LID_BIN].legend(fontsize=fontsize + 4,
                         frameon=False, ncol=2, loc='upper center',
                         bbox_to_anchor=(.5, -.05))

    print('plotted', TRUE_LID_NAME)

figname = 'binaryLID_pred_movementDistribution'
plt.savefig(os.path.join(figpath, 'prediction', figname),
            dpi=300, facecolor='w',)
plt.close()