## ReTap - UPDRS-Tapping Assessment - Feature Exploration

This notebooks helps to visualise and statistically tst created tapping-features.



### 0. Loading packages and functions, defining paths



In [1]:
# Importing Python and external packages
import os
import sys
import importlib
import pandas as pd
import numpy as np
import sklearn as sk
import scipy
import matplotlib.pyplot as plt
# import matplotlib.gridspec as gridspec



In [3]:
# check some package versions for documentation and reproducability
print('Python sys', sys.version)
print('pandas', pd.__version__)
print('numpy', np.__version__)
# print('mne_bids', mne_bids.__version__)
# print('mne', mne.__version__)
print('sci-py', scipy.__version__)
print('sci-kit learn', sk.__version__)



## developed with:
# Python sys 3.9.7 (default, Sep 16 2021, 08:50:36) 
# [Clang 10.0.0 ]
# pandas 1.3.4
# numpy 1.20.3
# mne_bids 0.9
# mne 0.24.1
# sci-py 1.7.1
# sci-kit learn 1.0.1

## Currently (own env) since 31.08.22
# Python sys 3.9.12 (main, Jun  1 2022, 06:36:29) 
# [Clang 12.0.0 ]
# pandas 1.4.3
# numpy 1.21.5
# sci-py 1.7.3
# sci-kit learn 1.1.1

Python sys 3.9.13 (main, Oct 13 2022, 21:23:06) [MSC v.1916 64 bit (AMD64)]
pandas 1.4.4
numpy 1.23.3
sci-py 1.9.1
sci-kit learn 1.1.2


In [78]:
# ft extraction
import tap_extract_fts.tapping_postFeatExtr_calc as ft_calc 

# own helper functions
import retap_utils.utils_dataManagement as utils_dataMn
# from tap_extract_fts import main_featExtractionClass as mainFtExtr

### Load or creating tapping-traces

In [5]:
### IMPORT CREATED CLASSES FROM FILES
from tap_extract_fts.main_featExtractionClass import FeatureSet, singleTrace

deriv_path = os.path.join(utils_dataMn.get_local_proj_dir(), 'data', 'derivatives')

# berClass = utils_dataManagement.load_class_pickle(os.path.join(deriv_path, 'ftClass_bertest.P'))
# dusClass = utils_dataManagement.load_class_pickle(os.path.join(deriv_path, 'ftClass_DUS.P'))

fts = utils_dataMn.load_class_pickle(os.path.join(deriv_path, 'ftClass_ALL_20230301.P'))
# ftClass = utils_dataMn.load_class_pickle(os.path.join(deriv_path, 'ftClass_ALL_20221214.P'))
# ftClass10 = utils_dataMn.load_class_pickle(os.path.join(deriv_path, 'ftClass_ALL_max10_20221214.P'))

### 2a. Visualise detected Features per Tap (FIG3)

In [6]:
sub1 = 'BER023'

# t1 = 'BER023_M0S0_R_3'   # only 5 taps'
t1 = f'{sub1}_M1S0_L_1'
t2 = f'{sub1}_M1S1_R_3'

ft1 = 'impactRMS'
ft2 = 'raise_velocity'
ft3 = 'intraTapInt'
ft4 = 'tap_entropy'

In [80]:
importlib.reload(ft_calc)

fig_fname = 'FIG3_featCourse_2Cases'

PLOT_SINGLE_TRACES = True
PLOT_MEANS = True
TO_SAVE = False

subs = [
    'BER023',
    'BER024',
    # 'DUS022',
    # 'DUS007'
]
ft_sel = ['impactRMS', 'raise_velocity', 'intraTapInt', 'tap_entropy']
ft_names = ['impact-RMS', 'raising-velocity','inter-tap-interval',  'tap-entropy']
ft_units = ['g', 'm/s/s', 's',  'a.u.']

score_colors = {0: 'olivedrab', 1: 'forestgreen',  #lawngreen
                2: 'blue', 3: 'purple'}
fsize = 16 
fig, axes = plt.subplots(len(ft_sel), len(subs), 
                         sharex='col',
                        #  sharey='row',
                         figsize=(12, 12))

for i_s, sub in enumerate(subs):
    if i_s == 0: title = 'Case A'
    elif i_s == 1: title = 'Case B'
    axes[0, i_s].set_title(title, fontsize=fsize+4, weight='bold',)

    for i_f, f in enumerate(ft_sel):

        axes[i_f, 0].set_ylabel(r"$\bf{" + str(ft_names[i_f])
                                + "}$" + f'\n[{ft_units[i_f]}]',
                                fontsize=fsize,)

        # gather score values per sub, per feat
        if PLOT_MEANS:
            score_lists = {}
            for score in score_colors.keys(): score_lists[score] = []

        for t in fts.incl_traces:

            if not t.startswith(sub): continue
            score = getattr(fts, t).tap_score
            if score == 4: continue
            col = score_colors[score]

            feats = getattr(fts, t).fts
            values = getattr(feats, f)

            if PLOT_SINGLE_TRACES:
                axes[i_f, i_s].plot(values, color=col, alpha=.2,)
            
            if PLOT_MEANS:
                 score_lists[score].append(list(values))

        if PLOT_MEANS:
            mean_dict, err_dict = ft_calc.get_means_std_errs(score_lists)
            for score in mean_dict.keys():
                axes[i_f, i_s].plot(mean_dict[score],
                                    color=score_colors[score],
                                    alpha=.8, lw=3,)
                axes[i_f, i_s].fill_between(x=np.arange(len(mean_dict[score])),
                                            y1=mean_dict[score] - err_dict[score],
                                            y2=mean_dict[score] + err_dict[score],
                                            color=score_colors[score], alpha=.3,)
                


        axes[i_f, i_s].tick_params(axis='both', labelsize=fsize,
                                    size=fsize)
        axes[i_f, i_s].spines[['right', 'top']].set_visible(False)

    axes[-1, i_s].set_xlabel('Detected taps (observations)', fontsize=fsize,)
    # create legend labels with dummy plots
    if i_s == 0:
        for score in score_colors.keys():
            axes[-1, 1].plot([], c=score_colors[score], label=f"{score}'s",
                             lw=5, )

        lgd = fig.legend(ncol=4, fontsize=fsize+4, frameon=False,
                   bbox_to_anchor=(.5, -.05), loc='lower center')


plt.tight_layout()

if TO_SAVE:
    plt.savefig(os.path.join(utils_dataMn.find_onedrive_path('figures'),
                             'feature_course (fig3)', fig_fname),
                bbox_extra_artists=(lgd,), bbox_inches='tight',
                dpi=450, facecolor='w',)
plt.close()

In [None]:
import tap_plotting.retap_check_taps as plot_taps

In [None]:
# ### PLOT DETECTED TAPS for all Traces and save figures
# importlib.reload(plot_taps)
# plot_taps.plot_detected_taps(ftClass)

### 2b. Visualise Features as Group with Boxplots

In [None]:
import tap_plotting.retap_visualise_fts as retapViz

In [None]:
### GET GLOBAL SUMMARY
# 
# 21 blocks false positive, missed 1
# blocks without taps

block_durations = []
ntaps = []
subs = []
scores = []

for trace in ftClass.incl_traces:
    subs.append(getattr(ftClass, trace).sub)
    scores.append(getattr(ftClass, trace).tap_score)

    try:
        ntaps.append(getattr(ftClass, trace).fts.total_nTaps)
        block_durations.append(getattr(ftClass, trace).acc_sig.shape[1] / 250)
        
    except AttributeError:
        print(trace)
print()
print(f'UPDRS subscores: {np.unique(scores, return_counts=True)}')
print(f'In {len(np.unique(subs))} unique subjects, {len(ftClass.incl_traces)} tap blocks found')
print(f'block durations: {np.mean(block_durations)} +/- {np.std(block_durations)}')
print(f'n taps: {np.mean(ntaps)} +/- {np.std(ntaps)}')

In [None]:
importlib.reload(retapViz)

fts_include = [
    'mean_tapRMSnrm', 'coefVar_tapRMSnrm', 'decr_tapRMSnrm', 'slope_tapRMSnrm',
    'mean_intraTapInt', 'coefVar_intraTapInt', 'decr_intraTapInt', 'slope_intraTapInt',
    'mean_jerkiness', 'coefVar_jerkiness', 'decr_jerkiness', 'slope_jerkiness'
]

tempScoreDict, fts_include = retapViz.sort_fts_on_tapScore(
    ftClass=ftClass,
)

retapViz.plot_boxplot_feats_per_subscore(
    fts_include=fts_include,
    sorted_feat_dict=tempScoreDict,
    plot_title='ReTap DUS and BER',# and DUS',
    figsave_name=f'retap_fullCohort_{merge_meth}_v2',
    figsave_dir=os.path.join(
        utils_dataManagement.find_onedrive_path('figures'),
        'fts_boxplots',
    ),
    show=False,
)


### 2c. Visualise Slow vs Fast Tappers

In [None]:
fig_path_1D = utils_dataMn.find_onedrive_path('figures')
import datetime as dt

In [None]:
ft_select = 'freq'   #  intraTapInt, freq, total_nTaps, tapRMSnrm
aggr_meth = 'mean'  # mean, std dev, coef of var
y_label = ft_select

traces_excl = [
    'DUS006_M0S0_L_1',
]

totalFreq = []
totalTaps = []
scores, values = [], []



box_lists = {}
for i in range(5): box_lists[i] = []

for trace in ftClass10.incl_traces:
    
    if trace in traces_excl: continue

    trace = getattr(ftClass10, trace)
    value = getattr(trace.fts, ft_select)
    
    if type(value) == np.ndarray:
        if aggr_meth == 'mean': value = np.nanmean(value)
        elif aggr_meth == 'std dev': value = np.nanstd(value)
        elif aggr_meth == 'coef of var': value = scipy.stats.variation(value)

        y_label = f'{aggr_meth} {ft_select}'
    values.append(value)

    score = getattr(trace, 'tap_score')
    scores.append(score)

    box_lists[score].append(value)

box_lists = [box_lists[i] for i in range(5)]


In [None]:
fs = 14  # fontsize

box_lists = retapViz.clean_list_of_lists(box_lists)

plt.figure(figsize=(8, 6))

plt.boxplot(box_lists)

plt.ylabel(y_label, size=fs)

plt.xticks(range(1, 6), labels=range(5))
plt.xlabel('UPDRS tap score', size=fs)

plt.tick_params(axis='both', size=fs, labelsize=fs)

# plt.ylim(0, 1.5)

dd = str(dt.date.today().day).zfill(2)
mm = str(dt.date.today().month).zfill(2)
figname = (
    f'{dt.date.today().year}{mm}{dd}'
    f'_ftBoxplot_10taps_{y_label}'
)

# plt.savefig(
#     os.path.join(fig_path_1D, 'fts_boxplots', figname),
#     dpi=150, facecolor='w',)

plt.show()

#### PLOT CLUSTERS OF FAST / SLOW TAPPERS

In [None]:
import tap_plotting.retap_plot_clusters as plot_cluster
import tap_predict.tap_pred_prepare as pred_prep
import retap_utils.get_datasplit as get_split


In [None]:
importlib.reload(pred_prep)
importlib.reload(get_split)

# create matrix to cluster with
traces_excl = [
    'DUS006_M0S0_L_1',
]

ft_sel = [
    'mean_intraTapInt',
    'coefVar_intraTapInt',
    'freq'
]

# incl only dev-dataset and excl hold-out-dataset
datasplit_subs = get_split.find_dev_holdout_split(feats=ftClass10, )

X, y = pred_prep.create_X_y_vectors(
    ftClass=ftClass10,
    incl_feats=ft_sel,
    incl_traces=ftClass.incl_traces,
    excl_traces=traces_excl,
    excl_subs=datasplit_subs['dev'],
    to_zscore=True,
)


## MASKING BCS TOO LOW NUMBERS
# UPDRS 4 -> 3 merge (4: n=3)
mask = y == 4
y[mask] = 3

# # UPDRS 0 -> 1 merge (0: n=40)
# mask = y == 0
# y[mask] = 1

In [None]:
importlib.reload(plot_cluster)

n_clusters = 2

dd = str(dt.date.today().day).zfill(2)
mm = str(dt.date.today().month).zfill(2)

plot_cluster.plot_cluster_kMeans(
    X=X, y=y, n_clusters=n_clusters,
    use_pca=True,
    z_score=True,
    alt_labels=['mean ITI', 'cv ITI'],
    show=False,
    figsave_dir=os.path.join(fig_path_1D, 'clustering'),
    figsave_name=(
        f'{dt.date.today().year}{mm}{dd}_holdOutSet'
        f'_{n_clusters}clusters_10taps_M_CV_iti_freq'   # freq
    ),
)

y_clust, centr_clust, _ = plot_cluster.get_kMeans_clusters(
    X=X,
    n_clusters=n_clusters,
    use_pca=True,
    z_score=True,
)

In [None]:
# plot distribution of tap-sub-scores in clusters

fig, ax = plt.subplots(1, 1, figsize=(12, 8))

fs=16

if n_clusters == 3:
    widths = [.3, -.3, .3]
    aligns = ['left', 'mid', 'mid']
elif n_clusters == 2:
    widths = [-.3, .3]
    aligns = ['mid', 'mid']



for n_cl in range(n_clusters):

    clust_y = y[y_clust == n_cl] 
    plt.hist(
        clust_y,
        bins=np.arange(5),
        width=widths[n_cl],
        align=aligns[n_cl],
        label=f'Cluster {n_cl + 1}\n(n={len(clust_y)})')

# plt.hist(
#     y[y_clust == 1],
#     align='right',
#     label=f'Cluster 2 (n={})')

plt.ylabel('n Observations (10-s traces)', size=fs,)
plt.xlim(-.5, 4)

plt.xticks(np.arange(4), labels=['0', '1', '2', '3 + 4'])
plt.xlabel('UPDRS sub tap score', size=fs,)

plt.legend(fontsize=fs, frameon=False,)

plt.tick_params(axis='both', size=fs, labelsize=fs)

for side in ['top', 'right']: ax.spines[side].set_color('none')

dd = str(dt.date.today().day).zfill(2)
mm = str(dt.date.today().month).zfill(2)
figname = (
    f'{dt.date.today().year}{mm}{dd}'
    f'_{n_clusters}clusters_10taps_holdOutSet'
    'M_CV_iti_freq_scores'  # freq
)

plt.savefig(
    os.path.join(fig_path_1D, 'clustering', figname),
    dpi=150, facecolor='w',)

plt.tight_layout()

plt.close()

### 3. Visualise single Features over Tap-course


still to adapt to current feature class objects