## Prediction with Movement dependency


Notebook to train and test prediction models that use movement dependent and/or movement independent data / features

In [None]:
# Importing Python and external packages
import os
import sys
import importlib
import json
import csv
from dataclasses import dataclass, field, fields
from itertools import compress, product
import pandas as pd
import numpy as np
import sklearn as sk
from scipy import signal, stats

import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
def get_project_path_in_notebook(
    subfolder: str = '',
):
    """
    Finds path of projectfolder from Notebook.
    Start running this once to correctly find
    other modules/functions
    """
    path = os.getcwd()

    while path[-20:] != 'dyskinesia_neurophys':

        path = os.path.dirname(path)
    
    return path

In [None]:
projectpath = get_project_path_in_notebook()


Import own functions

In [None]:
os.chdir(os.path.join(projectpath, 'code'))

# own utility functions
import utils.utils_fileManagement as utilsFiles

# own data exploration functions
import lfpecog_features.feats_read_proc_data as read_data
import lfpecog_preproc.preproc_import_scores_annotations as importClin
import lfpecog_analysis.ft_processing_helpers as ftProc
import lfpecog_analysis.import_ephys_results as importResults
import lfpecog_analysis.stats_fts_lid_corrs as ftLidCorr
import lfpecog_analysis.psd_lid_stats as lidStats
import lfpecog_analysis.load_SSD_features as load_ssdFts
import lfpecog_analysis.ft_processing_helpers as ftProc
import lfpecog_features.feats_helper_funcs as ftHelp
import lfpecog_features.feats_spectral_helpers as specHelp
from lfpecog_features.get_ssd_data import get_subject_SSDs

import lfpecog_predict.prepare_predict_arrays as predArrays
import lfpecog_features.extract_ssd_features as ssdFeats

from lfpecog_plotting.plotHelpers import get_colors
import lfpecog_plotting.plotHelpers as pltHelp
import lfpecog_plotting.plot_FreqCorr as plotFtCorrs
import lfpecog_plotting.plot_SSD_feat_descriptives as plot_ssd_descr

check versions

In [None]:
from  matplotlib import __version__ as plt_version

# check some package versions for documentation and reproducability
print('Python sys', sys.version)
print('pandas', pd.__version__)
print('numpy', np.__version__)
print('matplotlib', plt_version)
# Python sys 3.9.0 (default, Nov 15 2020, 08:30:55) [MSC v.1916 64 bit (AMD64)]
# pandas 1.4.4
# numpy 1.23.3
# matplotlib 3.5.3

### Import data

imports SSD-band-envelops with parallel CDRS, timestamps, task, movement-coding

In [None]:
# SET VERSIONS

FT_VERSION='v6'
SETTINGS = utilsFiles.load_ft_ext_cfg(FT_VERSION=FT_VERSION)

SUBS = utilsFiles.get_avail_ssd_subs(
    DATA_VERSION=SETTINGS["DATA_VERSION"],
    FT_VERSION=FT_VERSION,
)

In [None]:
importlib.reload(predArrays)

# get move-selected env arrays
# contains: freq-bands, CDRS, timestamps, tasks, mov-coding
DATA, env_fbands = {}, {}

for sub in SUBS:
    DATA[sub], env_fbands[sub] = predArrays.get_move_selected_env_arrays(
        sub=sub, LOAD_SAVE=True
    )

In [None]:
def get_task_minutes(DATA):
    task_minutes = {}
    for sub in DATA:

        task_arr = DATA[sub]['lfp_left'][-2]
        min_arr = np.around(DATA[sub]['lfp_left'][-3] / 60)
        uniq_min, uniq_idx = np.unique(min_arr, return_index=True)
        uniq_task = task_arr[uniq_idx]

        task_minutes[sub] = [uniq_min, uniq_task]
    
    return task_minutes

In [None]:
TASK_MINS = get_task_minutes(DATA)

### Plot Powers: Activity vs. Dyskinesia

In [None]:
FT_VERSION = 'v6'
INCL_CORE_CDRS = True
CATEG_CDRS = False
MILD_CDRS = 4
SEV_CDRS = 8
INCL_ECOG = False

FeatLid_STN = ftProc.FeatLidClass(
    FT_VERSION=FT_VERSION,
    CDRS_RATER='Patricia',
    INCL_ECOG=INCL_ECOG,
    INCL_ACC_RMS=True,
    CATEGORICAL_CDRS=CATEG_CDRS,
    CORR_TARGET='CDRS',
    cutMild=MILD_CDRS, cutSevere=SEV_CDRS,
    TO_CALC_CORR=False,
)

In [None]:
def add_mean_gamma_column(df):
    """
    gives round minutes and rest/tap tasks [0, 1],
    free not included
    """

    gamma_keys = [k for k in df.keys() if 'gamma1' in k]


    for k in gamma_keys:
        if k.replace('gamma1', 'gammaMean') not in df.keys():
            g1 = df[k].values
            g2 = df[k.replace('gamma1', 'gamma2')].values
            g3 = df[k.replace('gamma1', 'gamma3')].values
            gamma_mean = np.mean([g1, g2, g3], axis=0)
            df[k.replace('gamma1', 'gammaMean')] = gamma_mean
            


In [None]:
def get_peakish_gamma_part(
    sub, src: str = 'lfp', peak_sel_state = 'dysk',
):

    # all / rest / dysk /tap
    peak_df = specHelp.get_indiv_band_peaks(SRC=src,)
    gamma_peak_f = peak_df.loc[f'({sub}): {peak_sel_state}']['narrow_gamma']
    if np.isnan(gamma_peak_f):
        gamma_peak_f = peak_df.loc[f'({sub}): all']['narrow_gamma']

    if gamma_peak_f >= 60 and gamma_peak_f < 70:
        gamma_key = 'gamma1'
    elif gamma_peak_f >= 70 and gamma_peak_f < 80:
        gamma_key = 'gamma2'
    elif gamma_peak_f >= 80 and gamma_peak_f < 90:
        gamma_key = 'gamma3'
    
    return gamma_key

In [None]:
FT_VERSION = 'v6'
INCL_CORE_CDRS = True
CATEG_CDRS = False
MILD_CDRS = 4
SEV_CDRS = 8
INCL_ECOG = True

FeatLid_ECOG = ftProc.FeatLidClass(
    FT_VERSION=FT_VERSION,
    CDRS_RATER='Patricia',
    INCL_ECOG=INCL_ECOG,
    INCL_ACC_RMS=True,
    CATEGORICAL_CDRS=CATEG_CDRS,
    CORR_TARGET='CDRS',
    cutMild=MILD_CDRS, cutSevere=SEV_CDRS,
    TO_CALC_CORR=False,
)

In [None]:
def get_plot_jitter(x_temp, y_temp, jit_width=.5, ZERO_SPACE=True):
    """
    if ZERO_SPACE: x_temp 0's are shifted to -1's
    """
    x_jitter = np.random.uniform(
        low=-jit_width, high=jit_width, size=len(x_temp)
    )
    # move to -1 x-axis and increase jitter for no dyskinesia
    if ZERO_SPACE:
        nolid_sel = x_temp == 0  # select current samples with CDRS == 0
        x_temp[nolid_sel] -= 1   # set base at -1 on x-axis
        x_jitter[nolid_sel] = np.random.uniform(
            low=-jit_width * 2, high=jit_width * 2, size=sum(nolid_sel)
        )  # double the jitter
    y_jitter = np.random.uniform(
        low=-jit_width, high=jit_width, size=len(y_temp)
    )

    return x_jitter, y_jitter


In [None]:
def scatter_Feats_LID_MOVE(
    FeatClass,
    SAVE_FIG = True,
    SRC: str = 'lfp',  # lfp or ecog
    FIG_NAME = '00_featScatter_LID_MOVE',
    power_feats = {'ecog': ['ecog_delta_mean_psd',
                           'ecog_lo_beta_mean_psd',
                           'ecog_gammaMean_mean_psd'],
                    'lfp': [['lfp_left_delta_mean_psd',
                             'lfp_right_delta_mean_psd'],
                            ['lfp_left_lo_beta_mean_psd',
                             'lfp_right_lo_beta_mean_psd'],
                            ['lfp_left_gammaMean_mean_psd',
                             'lfp_right_gammaMean_mean_psd']]},
    pow_labels = ['THETA power', 'BETA power', 'GAMMA power'],
    gamma_mean_or_peakband: str = 'mean',
    shareX = True,
    ZERO_SPACE: bool = True,
    EXCL_FREE: bool = True,
    task_minutes = None,
):

    if shareX: sharex='col'
    else: sharex = 'none'
    fig, axes = plt.subplots(len(power_feats[SRC]), 1,
                            figsize=(6, 3 * len(power_feats[SRC])),
                            sharex=sharex,)

    np.random.seed(27)
    fsize=14

    for i_ft, pow_ft in enumerate(power_feats[SRC]):
        x_values, y_values, pow_values = [], [], []
        if 'gamma' in pow_ft or 'gamma' in pow_ft[0]: store_gamma_key = pow_ft  # to reset gamma key later

        for sub in FeatClass.FT_LABELS.keys():
            # add mean gamma column or change gamma bandkey
            if 'gamma' in pow_ft and gamma_mean_or_peakband == 'mean':
                add_mean_gamma_column(FeatClass.FEATS[sub])
            elif 'gamma' in pow_ft or 'gamma' in pow_ft[0]:
                pow_ft = store_gamma_key  # reset after potential sub changes
                gamma_peak_key = get_peakish_gamma_part(sub, src=SRC,)
                if SRC == 'ecog': pow_ft = pow_ft.replace('gammaMean', gamma_peak_key)
            # get data to plot
            x_temp = FeatClass.FT_LABELS[sub].copy()
            y_temp = FeatClass.ACC_RMS[sub].copy()
            times = FeatClass.FEATS[sub].index.values.copy()
            scores = FeatClass.FT_LABELS[sub].copy()
            # exclude FREE
            if EXCL_FREE:
                task_sel = [round(t) in task_minutes[sub][0] for t in times]
                x_temp = x_temp[task_sel]
                y_temp = y_temp[task_sel]
                times = times[task_sel]
                scores = scores[task_sel]
            
            # correct for ecog side
            if SRC == 'ecog':
                ecog_side = importClin.get_ecog_side(sub)
                pow_temp = FeatClass.FEATS[sub][pow_ft.replace('ecog', f'ecog_{ecog_side}')].copy()
                if EXCL_FREE: pow_temp = pow_temp[task_sel]
                # indiv zscore powers
                base_sel = np.logical_and(scores == 0, times < 5)
                base_m = np.mean(pow_temp[base_sel])
                base_sd = np.std(pow_temp[base_sel])
                pow_temp = (pow_temp - base_m) / base_sd
                # get jitter
                x_jitter, y_jitter = get_plot_jitter(x_temp, y_temp,)
                # add to overall lists
                x_values.extend(x_temp + x_jitter)
                y_values.extend(y_temp + y_jitter)
                pow_values.extend(pow_temp)

            elif SRC == 'lfp':
                # loop over left and right stn
                for ft_temp in pow_ft:
                    if gamma_mean_or_peakband == 'peakband' and 'gamma' in ft_temp:
                        ft_temp = ft_temp.replace('gammaMean', gamma_peak_key)

                    pow_temp = FeatClass.FEATS[sub][ft_temp]
                    if EXCL_FREE: pow_temp = pow_temp[task_sel]
                    # indiv zscore powers
                    base_sel = np.logical_and(scores == 0, times < 5)
                    base_m = np.mean(pow_temp[base_sel])
                    base_sd = np.std(pow_temp[base_sel])
                    pow_temp = (pow_temp - base_m) / base_sd
                    # get jitter
                    x_jitter, y_jitter = get_plot_jitter(x_temp, y_temp,)
                    # add to overall lists
                    x_values.extend(x_temp + x_jitter)
                    y_values.extend(y_temp + y_jitter)
                    pow_values.extend(pow_temp)


        # scatter plot data
        if SRC == 'lfp': a = .3
        else: a = .5
        scat = axes[i_ft].scatter(x_values, y_values, c=pow_values,
                                alpha=a, cmap='viridis', vmin=-3, vmax=3,)
        cbar = axes[i_ft].scatter([], [], c=[], cmap='viridis', vmin=-3, vmax=3,)  # colorbar without transparency
        cbar_lab = rf"$\bf{pow_labels[i_ft].split(' ')[0]}$" + " " + rf"$\bfPower$" + "\n(indiv. z-score)"

        plt.colorbar(cbar, ax=axes[i_ft], label=cbar_lab,)

        # plot meta info
        # axes[i_ft].set_title(pow_labels[i_ft], size=fsize,
        #                      weight='bold',)
        ylab = rf"$\bfMovement$" + " " + rf"$\bfpresence$" + "\n(acc, z-scored RMS)"
        xlab = rf"$\bfDyskinesia$" + " " + rf"$\bfseverity$" + "\n(CDRS, sum)"
        if shareX and i_ft == (len(power_feats) - 1):
            axes[i_ft].set_xlabel(xlab, size=fsize,)
        elif not shareX:
            axes[i_ft].set_xlabel(xlab, size=fsize,)
        axes[i_ft].set_ylabel(ylab, size=fsize,)
        axes[i_ft].set_ylim(-2, 4)

        xticks = np.arange(0, 15, 2)
        xticklabels = np.arange(0, 15, 2)
        if ZERO_SPACE: xticks[0] = -1
        axes[i_ft].set_xticklabels(xticklabels, size=fsize,)
        axes[i_ft].set_xticks(xticks, size=fsize,)

        axes[i_ft].axvline(x=.3, lw=5, color='lightgray',
                           alpha=.7,)

    if SRC == 'lfp': src = 'Subthalamic'
    else: src = 'Cortical'
    plt.suptitle(f'{src} Features:\n'
                'Movement and Dyskinesia dependence',
                size=fsize+4, weight='bold',)

    plt.tight_layout(pad=.5,)

    FIG_PATH = os.path.join(
        utilsFiles.get_project_path('figures'),
        'final_Q1_2024',
        'feat_scatter_LID_MOVE',
    )
    if SAVE_FIG:   
        plt.savefig(os.path.join(FIG_PATH, FIG_NAME),
                    dpi=300, facecolor='w',)
        print(f'saved plot {FIG_NAME} in {FIG_PATH}!')
        plt.close()
    else: plt.show()



In [None]:
SRC = 'ecog'

if SRC == 'lfp': FeatClass = FeatLid_STN  # FeatLid_ECOG
if SRC == 'ecog': FeatClass = FeatLid_ECOG

FIG_NAME = (f'woFREE_featScatter_{SRC.upper()}_LID_MOVE_gPeakBand'
            f'_n{len(FeatClass.FEATS.keys())}')

scatter_Feats_LID_MOVE(FeatClass=FeatClass,
                       SRC=SRC,
                       SAVE_FIG=True,
                       FIG_NAME=FIG_NAME,
                       gamma_mean_or_peakband='peakband',
                       EXCL_FREE=True,
                       task_minutes=TASK_MINS,)

### Visualise ratio biomarker


Calculates based on envelop arrays from predArrays.get_move_selected_env_arrays()

In [None]:
src = 'lfp_left'

ex_sub = list(env_fbands.keys())[0]
ex_src= list(env_fbands[ex_sub].keys())[0]

i_theta = np.where([k == 'theta' for k in env_fbands[sub][src]])[0][0]
i_beta = np.where([k == 'lo_beta' for k in env_fbands[sub][src]])[0][0]
i_gammaPeak = np.where([k == 'gammaPeak' for k in env_fbands[sub][src]])[0][0]
i_cdrs = len(env_fbands[sub][src])
i_time = i_cdrs + 1
i_task = i_time + 1
# i_move is last

time_list = {'lid': [], 'nolid': []}
ratio_list = {'lid': [], 'nolid': []}

lid_onsets = []

for sub, src in product(DATA.keys(),
                        ['lfp_left', 'lfp_right']):
    
    if sum(DATA[sub][src][i_cdrs, :]) == 0:
        print(f'sub {sub} had no LID')
        subgroup = 'nolid'
    else:
        subgroup = 'lid'

    print(f'...calc {sub, src}')
    
    idx_sort = np.argsort(DATA[sub][src][i_time, :])
    sort_arr = DATA[sub][src][:, idx_sort]

    theta = sort_arr[i_theta, :]
    beta = sort_arr[i_beta, :]
    gamma = sort_arr[i_gammaPeak, :]


    for arr in [theta, beta, gamma]:
        off_sel = sort_arr[i_time, :] < 5
        m = np.mean(arr[off_sel])
        sd = np.std(arr[off_sel])
        arr = (arr - m) / sd

    assert theta.shape == beta.shape, 'shapes unequal'

    if subgroup == 'lid':
        i0_lid = np.where(sort_arr[i_cdrs, :] > 0)[0][0]
        t0_lid = sort_arr[i_time, i0_lid]
        lid_times = sort_arr[i_time, :] - t0_lid
        lid_onsets.append(t0_lid)
    else:
        lid_times = sort_arr[i_time, :]


    time_list[subgroup].append(lid_times)
    ratio = (theta / beta) * gamma
    ratio = (ratio - np.mean(ratio)) / np.std(ratio)
    ratio_list[subgroup].append(ratio)


In [None]:
min_sec, max_sec = (
    int(np.min([np.min(l) for l in time_list['lid']])),
    int(np.max([np.max(l) for l in time_list['lid']]))
)

print(f'mean LID onset: {round(np.mean(lid_onsets) / 60, 1)} minutes'
      f' (sd: {round(np.std(lid_onsets) / 60, 1)})')
# correct group without LID to comparable time offsets
nolid_new_times = []
for t in time_list['nolid']:
    nolid_new_times.append(t - np.mean(lid_onsets))


In [None]:
# create overall array per X seconds
WIN_LEN = 10

## LID group
min_sec, max_sec = (
    int(np.min([np.min(l) for l in time_list['lid']])),
    int(np.max([np.max(l) for l in time_list['lid']]))
)
t_new = np.arange(min_sec, max_sec, WIN_LEN)

ratio_arr = np.array([[np.nan] * len(t_new)] * len(ratio_list['lid']))

for i_t, t0 in enumerate(t_new):

    for i_row, (sig_temp, t_temp) in enumerate(
        zip(ratio_list['lid'], time_list['lid'])
    ):
        # select idx for window
        win_sel = np.logical_and(t_temp > t0, t_temp < (t0 + WIN_LEN))
        # add mean ratio to correct idx
        ratio_arr[i_row, i_t] = np.mean(sig_temp[win_sel])


## NO-LID group

# correct group without LID to comparable time offsets
nolid_new_times = []
for t in time_list['nolid']:
    nolid_new_times.append(t - np.mean(lid_onsets))

ratio_arr_noLID = np.array([[np.nan] * len(t_new)] * len(ratio_list['nolid']))

# use same time frame
for i_t, t0 in enumerate(t_new):

    for i_row, (sig_temp, t_temp) in enumerate(
        zip(ratio_list['nolid'], nolid_new_times)
    ):
        # select idx for window
        win_sel = np.logical_and(t_temp > t0, t_temp < (t0 + WIN_LEN))
        # add mean ratio to correct idx
        ratio_arr_noLID[i_row, i_t] = np.mean(sig_temp[win_sel])
        

In [None]:
from scipy.signal import savgol_filter

In [None]:
def plot_ratio_biomarker(
        ratio_arr, Z_SCORE_RATIOS = True,
        MIN_SUBS = 6, fsize = 14,
        SMOOTH_WIN : int = 0,
        SAVE_FIG: bool = False,
):
    # subjects contributing to mean, per window
        n_hemisf_present = np.sum(~np.isnan(ratio_arr), axis=0)
        n_subs_wins = np.sum(~np.isnan(ratio_arr), axis=0) / 2

        # original arr
        if not Z_SCORE_RATIOS:
                ratio_error = np.nanstd(ratio_arr, axis=0) / np.sqrt(n_hemisf_present)
                ratio_std = np.nanstd(ratio_arr, axis=0)
                ratio_mean = np.nanmean(ratio_arr, axis=0)

        # use zscored arr
        elif Z_SCORE_RATIOS:
                z_ratios = np.array([(row - np.nanmean(row)) / np.nanstd(row)
                                for row in  ratio_arr])

                ratio_error = np.nanstd(z_ratios, axis=0) / np.sqrt(n_hemisf_present)
                ratio_std = np.nanstd(z_ratios, axis=0)
                ratio_mean = np.nanmean(z_ratios, axis=0)


        fig, ax = plt.subplots(1,1, figsize=(8, 4))

        x = t_new[n_subs_wins >= MIN_SUBS] / 60
        y = ratio_mean[n_subs_wins >= MIN_SUBS]
        var = ratio_error[n_subs_wins >= MIN_SUBS]

        if SMOOTH_WIN > 0:
                y = savgol_filter(y, window_length=SMOOTH_WIN, polyorder=3,)

        ax.plot(x, y)
        ax.fill_between(x, y1=y - var, y2=y + var, alpha=.5,)
        ax.axvline(x=0, color='gray', alpha=.5, lw=3, ls='--',)

        if Z_SCORE_RATIOS: r = 'Z-scored Ratio'
        else: r = 'Ratio'
        ax.set_ylabel(f'STN Theta * Gamma / Beta {r}',  #\n(4-8 / 12-20 Hz)',
                size=fsize, weight='bold',)
        ax.set_xlabel('Time vs Dyskinesia Onset (minutes)',
                size=fsize, weight='bold',)

        ax.tick_params(axis='both', size=fsize, labelsize=fsize,)

        if Z_SCORE_RATIOS:
                for y in [-2, -1, 0, 1, 2, 3]:
                        ax.axhline(y=y, color='gray', alpha=.3, lw=.5,)
                ax.set_ylim(-2, 2)
        else:
                for y in [-.1, 0, 0.1]:
                        ax.axhline(y=y, color='gray', alpha=.3, lw=.5,)
                ax.set_ylim(-.2, .2)
                ax.axhline(y=0, color='gray', alpha=.5, lw=1,)

        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)

        plt.tight_layout()

        if SAVE_FIG:
                plt.savefig(os.path.join(utilsFiles.get_project_path('figures'),
                                         'final_Q1_2024',
                                         'prediction',
                                         f'theta_beta_ratio_v1_10sec_min{MIN_SUBS}subs_smooth{SMOOTH_WIN}'),
                            dpi=300, facecolor='w',)
                plt.close()
        else:

                plt.show()

In [None]:
# plot_ratio_biomarker(ratio_arr=ratio_arr,
#                      Z_SCORE_RATIOS=False,
#                      MIN_SUBS=5,
#                      SMOOTH_WIN=0,)

plot_ratio_biomarker(ratio_arr=ratio_arr,
                     Z_SCORE_RATIOS=False,
                     MIN_SUBS=5,
                     SMOOTH_WIN=10,
                     SAVE_FIG=True,)

# plot_ratio_biomarker(ratio_arr=ratio_arr_noLID,
#                      Z_SCORE_RATIOS=False, MIN_SUBS=2)

Extract Spectral Power and Variation in Feature windows

TODO:
- current gamma: only indiv peak, add sum gamma over 60 - 90

In [None]:
def indiv_zscoring_feats(X_arr, sub_arr):

    for i_f, sub in product(np.arange(X_arr.shape[1]),
                            np.unique(sub_arr)):
        # loop over all feature and sub combinations
        sub_sel = sub_arr == sub
        m = np.mean(X_arr[sub_sel, i_f])
        sd = np.std(X_arr[sub_sel, i_f])
        # z-score values for sub
        X_arr[sub_sel, i_f] = (X_arr[sub_sel, i_f] - m) / sd
    
    return X_arr

In [None]:
importlib.reload(ssdFeats)

X_arrs, y_arrs, sub_arrs = {}, {}, {}
mov_dep_code = {}

for i_mov, MOV_SEL in enumerate(['INDEP', 'DEPEND']):
    # loading/ creating/ saving in ssdFeats script
    (
        X_arrs[i_mov], y_arrs[i_mov], sub_arrs[i_mov], feat_names
    ) = ssdFeats.get_moveSpec_predArrays(
        MOV_SEL=MOV_SEL, LOAD_SOURCES=['STN',],
        POWER_METHOD='ENV'
    )
    # add movement code
    mov_dep_code[i_mov] = np.array([i_mov] * X_arrs[i_mov]['STN'].shape[0]).T

# merging for zscoring together
stn_X = np.concatenate([X_arrs[0]['STN'], X_arrs[1]['STN']], axis=0)
stn_y = np.concatenate([y_arrs[0]['STN'], y_arrs[1]['STN']], axis=0)
stn_subids = np.concatenate([sub_arrs[0]['STN'], sub_arrs[1]['STN']], axis=0)

mov_dep_code = np.concatenate([mov_dep_code[0], mov_dep_code[1]], axis=0)

In [None]:
# calculate coefficients and pvalues
CDRS_categs = {0: 'none', 1: 'mild',
               2: 'moderate', 3: 'severe'}

X = stn_X.copy()
y = stn_y.copy()
sub_ids = stn_subids.copy()

stat_arr = {'INDEP': {'coef': [], 'pval': []},
            'DEPEND': {'coef': [], 'pval': []}}

X = indiv_zscoring_feats(X, sub_arr=sub_ids)

for i_ft, ft in enumerate(feat_names['STN']):
    

    for i_mov, MOV_SEL in enumerate(['INDEP', 'DEPEND']):
        mov_bool = mov_dep_code == i_mov
        # define X and groups for feat
        ft_temp = X[mov_bool, i_ft]
        y_temp = y[mov_bool]
        box_categs = [ft_temp[y_temp == cat]
                      for cat in CDRS_categs.keys()]
        # run LMM
        coeff, pval = lidStats.run_mixEff_wGroups(
            dep_var=ft_temp,
            indep_var=y_temp,
            groups=sub_ids[mov_bool],
            TO_ZSCORE=False,
        )
        # save in dict-lists
        stat_arr[MOV_SEL]['coef'].append(coeff)
        stat_arr[MOV_SEL]['pval'].append(pval)

        

In [None]:
# plot boxplots per feature

FIG_NAME = '0129env_boxplots_specPowerVar_vs_LIDcategs'


fig, axes = plt.subplots(len(feat_names['STN']), 2,
                         figsize=(12, 18),
                         sharex='col', sharey='row')

for i_ft, ft in enumerate(feat_names['STN']):
    
    for i_mov, MOV_SEL in enumerate(['INDEP', 'DEPEND']):
        # get boxplot data
        mov_bool = mov_dep_code == i_mov
        # define X and groups for feat
        ft_temp = X[mov_bool, i_ft]
        y_temp = y[mov_bool]
        box_categs = [ft_temp[y_temp == cat]
                      for cat in CDRS_categs.keys()]
        
        # stats are calculated before

        # plotting
        coeff = stat_arr[MOV_SEL]['coef'][i_ft]
        pval = stat_arr[MOV_SEL]['pval'][i_ft]
        axes[i_ft, i_mov].boxplot(box_categs)
        if pval < (.05 / len(feat_names['STN'])): w = 'bold'
        else: w='normal'
        axes[i_ft, i_mov].set_title(f'{ft}, mov-{MOV_SEL}\n'
                f'(coeff {round(coeff, 2)}, '
                f'p={round(pval, 5)})',
                weight=w,)
        axes[i_ft, i_mov].set_ylim(-3, 3)

        axes[i_ft, i_mov].set_xticks([1,2,3,4])
        axes[i_ft, i_mov].set_xticklabels(CDRS_categs.values())
        axes[i_ft, i_mov].set_ylabel('indiv. z-scored feature\n(a.u.)')
        axes[i_ft, i_mov].set_xlabel('LID (CDRS sum)')

plt.tight_layout()

plt.savefig(os.path.join(utilsFiles.get_project_path('figures'),
                            'feat_dysk_corrs',
                            'corr_boxplots',
                            FIG_NAME),
            dpi=300, facecolor='w',)

plt.close()
        

In [None]:
# plot HEATMAP

FIG_NAME = '0129cf_heatmap_specPowerVar_vs_LIDcategs'


fig, ax = plt.subplots(1, 1,
                         figsize=(12, 4),)
ALPHA = .05 / len(feat_names['STN'])
# 0 is not sign, 1 is sign
heat_arrs = {sig_label: np.array([
    [np.nan] * len(feat_names['STN'])
] * 2) for sig_label in [0, 1]}

for i_ft, ft in enumerate(feat_names['STN']):
    
    for i_mov, MOV_SEL in enumerate(['INDEP', 'DEPEND']):
        # fill arrays with stats calculated before
        coeff = stat_arr[MOV_SEL]['coef'][i_ft]
        pval = stat_arr[MOV_SEL]['pval'][i_ft]
        sig_lab = (pval < ALPHA).astype(int)
        heat_arrs[sig_lab][i_mov, i_ft] = coeff

# non-sign heatmap
vmin, vmax = -.25, .25
cmap = 'coolwarm'  # RdBu_r
nonsig_map = ax.imshow(heat_arrs[0], vmin=vmin, vmax=vmax,
                           cmap=cmap, )
# hatch = plt.pcolor(heat_arrs[0], vmin=vmin, vmax=vmax,
#                    hatch='//', cmap=cmap,
#                    edgecolor='w', )

# ax.imshow(X=heat_arrs[0], cmap='coolwarm',
        #   alpha=.6, vmin=-.3, vmax=.3,)
sig_map = ax.imshow(X=heat_arrs[1], cmap=cmap,  # RdBu_r
                    alpha=.9, vmin=vmin, vmax=vmax,)

for i_m, i_f in product(np.arange(heat_arrs[1].shape[0]),
                        np.arange(heat_arrs[1].shape[1])):
    if np.isnan(heat_arrs[1][i_m, i_f]): continue
    if abs(heat_arrs[1][i_m, i_f]) > .3: c='w'
    else: c = 'black'
    ax.text(i_f, i_m, s=round(heat_arrs[1][i_m, i_f], 2),
            color=c, horizontalalignment='center',
            verticalalignment='center', weight='bold',)

cbar = fig.colorbar(sig_map, pad=.01)
cbar.ax.set_ylabel('LMM coefficient (a.u.)')

ax.set_xticks(np.arange(len(feat_names['STN'])))
ax.set_xticklabels(feat_names['STN'],
                   rotation=75,)
ax.set_yticks([0, 1])
ax.set_yticklabels(['move-INDEPENDENT', 'move-DEPENDENT'],)

plt.tight_layout()

plt.savefig(os.path.join(utilsFiles.get_project_path('figures'),
                            'feat_dysk_corrs',
                            'corr_boxplots',
                            FIG_NAME),
            dpi=300, facecolor='w',)

plt.close()
        

In [None]:
heat_arrs[1].shape