In [1]:
# load packages
import itertools
from itertools import compress
import pdb

import time
from datetime import timedelta, datetime
from os.path import join
import os
from os import listdir
from os.path import isfile, join
import csv
from timeit import default_timer as timer

import numpy as np
import numpy.matlib
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import figure
from matplotlib.ticker import PercentFormatter
%matplotlib inline


import statsmodels.api as sm
from statsmodels.formula.api import ols
import scipy
from scipy.signal import welch, decimate, periodogram, find_peaks
from scipy import signal
from scipy import stats
from scipy.stats import pearsonr, mannwhitneyu, spearmanr, ranksums, ttest_ind, f_oneway
from scipy.ndimage.filters import uniform_filter1d
from statsmodels.stats.anova import AnovaRM
from statsmodels.stats.multitest import multipletests
from statsmodels.multivariate.manova import MANOVA
from statistics import mode
import math  
import random 
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler , MinMaxScaler
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, GradientBoostingClassifier
from sklearn import tree
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import LeaveOneGroupOut, cross_val_predict, cross_val_score, KFold, train_test_split, StratifiedKFold, GridSearchCV 
from sklearn.model_selection import permutation_test_score
from sklearn.metrics import roc_auc_score, roc_curve, classification_report, confusion_matrix, accuracy_score
from sklearn.metrics import precision_score, recall_score, auc, f1_score, precision_recall_curve
from sklearn.manifold import MDS
import seaborn as sns

import sys

In [2]:
# # Version check of used packages
print('Pandas:',pd.__version__)
print('Numpy:',np.__version__)
print('SciPy:',scipy.__version__)
# Pandas: 1.1.3
# Numpy: 1.19.1
# SciPy: 1.5.2

Pandas: 1.1.3
Numpy: 1.19.1
SciPy: 1.5.2


In [3]:
# define path, working directory which contains filtered accelerometer files

# path="/Users/jeroenhabets/Desktop/radboud data/data/"
notebook_path = os.getcwd()
path = os.path.dirname(notebook_path)
# images_path = "/Users/jeroenhabets/Starr Lab Dropbox/jeroen habets/PHD werkmap/UCSF time/Sensor-project/analysis images/"

os.chdir(path)
print('Path and working directory defined as %s' %path)


Path and working directory defined as /Users/jeroenhabets/Research/pd_sensors/brady_reallife


# Real-life ON- vs OFF-bradykinesia detection using a wrist-accelerometer  - Patient population description and classification results (incl. significance and clinical correlations)

Prep: Loading in extracted accelerometer bradykinesia features (one value/ 60 seconds, medState 0 is pre-medication, 1 is post-medication), and outcomes from classification analyses.

Part 1: Descriptive statistics of population.

Part 2: Analyzing population for differences in medication-states based on 4 main features, using MANOVA + post-hoc ANOVA analyses.

Part 3a: Comparing classification results with barplots (varying models and data inclusion approaches).

Part 3b: Comparing classification results with equivalent plots.

Part 4: Correlating predictive performance with clinical characteristics.


## Preparation: Load clinical info, features, and classification reults
Previsouly filtered data is loaded in to dataframes per patient-side.

In [223]:
def loadUPDRSscores():
    '''
    Input:
    Load in existing updrsScores table
    
    Returns:
    - updrsScores: dataframe with clinical scores of all patients
    
    PM: Differences are calculated OFF minus ON, which means clinical IMPROVEMENT (decrease in UPDRS in ON
    compared to OFF) results in POSITIVE diff-scores. (A negative diff-score means a clinical worsening 
    with a higher UPDRS-score in ON compared to OFF) 
    
    '''
    # Read in updrs file
    updrsScores = pd.read_csv(os.path.join(path,'data','updrsScores.csv'))
    # convert int to strings for PtId's
    updrsScores['PtId'] = updrsScores['PtId'].astype(str)
    # add missing zeros in front of PtId
    for row in np.arange(updrsScores.shape[0]):
        updrsScores.at[row,'PtId'] = updrsScores.loc[row]['Record Id'][3:]
    del(updrsScores['Record Id'])
    updrsScores = updrsScores.set_index('PtId')
    
    # create list for ON and OFF for subscore-lists
    OFF_sublists = {
        'leftHandBradyOFF': ['OFF_UPDRS_3_3c','OFF_UPDRS_3_4b','OFF_UPDRS_3_5b','OFF_UPDRS_3_6b',],
        'rightHandBradyOFF': ['OFF_UPDRS_3_3b','OFF_UPDRS_3_4a','OFF_UPDRS_3_5a','OFF_UPDRS_3_6a',],
        'bradyBodyOFF': ['OFF_UPDRS_3_14'],
        'leftHandTremorOFF': ['OFF_UPDRS_3_15b','OFF_UPDRS_3_16b','OFF_UPDRS_3_17b',],
        'rightHandTremorOFF': ['OFF_UPDRS_3_15a','OFF_UPDRS_3_16a','OFF_UPDRS_3_17a',],
        'legsOFF': ['OFF_UPDRS_3_7a','OFF_UPDRS_3_7b','OFF_UPDRS_3_8a','OFF_UPDRS_3_8b'],
        'gaitOFF': ['OFF_UPDRS_3_9','OFF_UPDRS_3_10','OFF_UPDRS_3_11'],
        'postureOFF': ['OFF_UPDRS_3_12','OFF_UPDRS_3_13'],
        'facialOFF': ['OFF_UPDRS_3_1','OFF_UPDRS_3_2',]}
    
    ON_sublists = {
        'leftHandBradyON': ['ON_UPDRS_3_3c', 'ON_UPDRS_3_4b','ON_UPDRS_3_5b','ON_UPDRS_3_6b',],
        'rightHandBradyON': ['ON_UPDRS_3_3b','ON_UPDRS_3_4a','ON_UPDRS_3_5a','ON_UPDRS_3_6a',],
        'bradyBodyON': ['ON_UPDRS_3_14'],
        'leftHandTremorON': ['ON_UPDRS_3_15b','ON_UPDRS_3_16b','ON_UPDRS_3_17b',],
        'rightHandTremorON': ['ON_UPDRS_3_15a','ON_UPDRS_3_16a','ON_UPDRS_3_17a',],
        'legsON': ['ON_UPDRS_3_7a','ON_UPDRS_3_7b','ON_UPDRS_3_8a','ON_UPDRS_3_8b'],
        'gaitON': ['ON_UPDRS_3_9','ON_UPDRS_3_10','ON_UPDRS_3_11'],
        'postureON': ['ON_UPDRS_3_12','ON_UPDRS_3_13'],
        'facialON': ['ON_UPDRS_3_1','ON_UPDRS_3_2',]}

    for l,(off, on) in enumerate(zip(OFF_sublists.keys(),ON_sublists.keys())):
        offList = []
        diffList = []
        relative_diffList = []
        for pt in updrsScores.index:
            offList.append(np.sum(updrsScores.loc[pt][OFF_sublists[off]]))
            diffScore = np.sum(updrsScores.loc[pt][OFF_sublists[off]]) - np.sum(updrsScores.loc[pt][ON_sublists[on]]) # calculate difference between off and on scores
            diffList.append(diffScore)
            relative_diffList.append(diffScore / (len(OFF_sublists[off])*4)) # take sum-difference, normalize to list-length by dividing by potential total score
        # create name for column
        off_colName = off[:-3] + '_off'
        rel_colName = off[:-3] + '_%diff'
        colName = off[:-3] + '_diff'
        updrsScores.insert(loc=l*2, value=diffList, column=colName)
        updrsScores.insert(loc=l*2 +1, value=relative_diffList, column=rel_colName)
        updrsScores.insert(loc=l*2 +2, value=offList, column=off_colName)
    
    # indicate which side had more arm-bradykinesia fluctuation
    affectedSides = []
    for pt in updrsScores.index:
        if updrsScores.loc[pt]['leftHandBrady_diff'] > updrsScores.loc[pt]['rightHandBrady_diff']:
            affectedSides.append('Left')
        else:
            affectedSides.append('Right')
    updrsScores.insert(loc=0, value=affectedSides, column='Side')
    # create uniform columns for bradykinesia and tremor on included side for analysis
    for symp in ['Tremor','Brady']:
        for var in ['_off','_diff','_%diff']:
            updrsScores['inclSideHand%s%s' % (symp,var)] = [float(99)]*updrsScores.shape[0]
    # fill in with values from correct sides
    for symp in ['Tremor','Brady']:
        for var in ['_off','_diff','_%diff']:
            for pt in updrsScores.index:
                if updrsScores.loc[pt]['Side'] == 'Left':
                    updrsScores.at[pt,'inclSideHand%s%s' % (symp,var)] = updrsScores.loc[pt]['leftHand%s%s' % (symp,var)]
                elif updrsScores.loc[pt]['Side'] == 'Right':
                    updrsScores.at[pt,'inclSideHand%s%s' % (symp,var)] = updrsScores.loc[pt]['rightHand%s%s' % (symp,var)]
    
    # selecting patient with handBrady improvement on vs off, and enough data
    for pt in updrsScores.index:
        if updrsScores.loc[pt]['inclSideHandBrady_diff'] < 0.5:
            updrsScores = updrsScores.drop(labels=[pt], axis=0)
    for pt in ['022','080']: # removing 022 and 080 due to not enough data for holdout
        updrsScores = updrsScores.drop(labels=[pt], axis=0)
    
    return updrsScores

In [5]:
def loadFeatures(minBradyDiff, updrsScores, windowLen=60):
    '''
    Input:
    - minBradyDiff = minimal difference in brady-updrs-subscores to include in analysis
    - updrsScores: dataframe with clinical scores of all patients
    
    Returns:
    - accData: one dictionary with accData, each patient has a dictionary per side, 
    containing pre and post session.
    
    '''
    
    # select patients to involve in analysis
    selectedIDs = []
    for pt in updrsScores.index:
        if updrsScores.loc[pt]['leftHandBrady_diff'] > minBradyDiff:
            selectedIDs.append(pt)
        elif updrsScores.loc[pt]['rightHandBrady_diff'] > minBradyDiff:
            selectedIDs.append(pt)
        else:
            continue
    # removing 022 and 080 if included based on clinical difference, because data is not large enough for holdout
    for pt in ['022','080']:
        if pt in selectedIDs:
            selectedIDs.remove(pt)
    # dictionary for all acc data
    features = {}
    for pt in selectedIDs:
        features[pt] = pd.read_csv(os.path.join(
            path,'data','features_oct20','%s_%isec_features.csv' % (pt,windowLen)))
    
    return features
        

In [224]:
updrsScores = loadUPDRSscores()
features = loadFeatures(minBradyDiff=0.5, updrsScores=updrsScores, windowLen=60)

In [121]:
def create_result_dicts():
    '''Creates all different outcome dictionaries'''
    dictout = {}
    resultsfiles = listdir(os.path.join(path,'results'))
    for model in ['indiv','group']:
        for cls in ['SV','RF']:
            sel = [np.logical_and(model in name,cls in name) for name in resultsfiles]
            mod_files = list(compress(resultsfiles, sel))
            for fts in ['allfts','4fts']:
                sel = [fts in name for name in mod_files]
                sel_files = list(compress(mod_files,sel))
                for act_filter in [True,False]:
                    if act_filter:
                        sel = ['ACTF' in name for name in sel_files]
                        file = list(compress(sel_files,sel))
                        if len(file) == 1:
                            file = file[0]
                            dat = pd.read_csv(os.path.join(path,'results',file), index_col=0)
                            if model == 'indiv': # indiv models need mean,sd calculation over 41 splits
                                temp = pd.DataFrame(index=['pred'])
                                for col in dat.keys():
                                    temp.at['pred',col] = np.mean(dat[col])
                                    temp.at['pred',col+'_sd'] = np.std(dat[col])
                                dat = temp
                            dictout[model+'_'+cls+'_'+fts+'_actfilter'] = dat.append(pd.Series(name='p', dtype='object')) # add empty row for p
                        else:
                            print('No or multiple files for combination:%s, %s, %s, %s' %(model,cls,fts,act_filter)) 
                    elif act_filter == False:
                        sel = ['ACTF' not in name for name in sel_files]
                        file = list(compress(sel_files, sel))
                        if len(file) == 1:
                            file = file[0]
                            dat = pd.read_csv(os.path.join(path,'results',file), index_col=0)
                            if model == 'indiv': # indiv models need mean,sd calculation over 41 splits
                                temp = pd.DataFrame(index=['pred'])
                                for col in dat.keys():
                                    temp.at['pred',col] = np.mean(dat[col])
                                    temp.at['pred',col+'_sd'] = np.std(dat[col])
                                dat = temp
                            dictout[model+'_'+cls+'_'+fts] = dat.append(pd.Series(name='p', dtype='object')) # add empty row for p
                        else:
                            print('No or multiple files for combination:%s, %s, %s, %s' %(model,cls,fts,act_filter))
    
    ### ADD SIGNIFIANCE TESTING WHEN PERMUTATIONS ARE READY!
    permfilelist = listdir(os.path.join(path,'results/perms')) #enter perm folder
    for model in dictout.keys():
        if model+'_5000perms.csv' in permfilelist: # go further for models with perm-file
            prm = pd.read_csv(os.path.join(path,'results/perms',model+'_5000perms.csv'), index_col=0)
            for col in dictout[model].keys():
                if col[-2:] != 'sd': # exclude sd columns
                    v = dictout[model].loc['pred'][col] # determine value of pt+metric
                    p = sum(prm[col]>v)/len(prm[col]) # calculate part of perms higher
                    dictout[model].at['p',col] = p # add p value to outcome dataframe
    
    return dictout


In [333]:
dictout = create_result_dicts()
# to check dict out shapes
keys=list(dictout.keys())
for key in keys:
    print(key,dictout[key].shape)

indiv_SV_allfts_actfilter (2, 240)
indiv_SV_allfts (2, 240)
indiv_SV_4fts_actfilter (2, 240)
indiv_SV_4fts (2, 240)
indiv_RF_allfts_actfilter (2, 240)
indiv_RF_allfts (2, 240)
indiv_RF_4fts_actfilter (2, 240)
indiv_RF_4fts (2, 240)
group_SV_allfts_actfilter (2, 120)
group_SV_allfts (2, 120)
group_SV_4fts_actfilter (2, 120)
group_SV_4fts (2, 120)
group_RF_allfts_actfilter (2, 120)
group_RF_allfts (2, 120)
group_RF_4fts_actfilter (2, 120)
group_RF_4fts (2, 120)


## Part 1: Descriptives of patient population

In [13]:
clin_vars = ['totalUPDRS3_On', 'totalUPDRS3_Off', 'totalUPDRS3_Diff', 
             'inclSideHandBrady_off',  'inclSideHandTremor_off',
             'inclSideHandBrady_diff',  'inclSideHandTremor_diff',
       'bradyBody_diff','legs_diff', 'gait_diff', 'posture_diff', 'facial_diff',] 
with open('results/updrs_descriptives_n%i.txt' % updrsScores.shape[0], 'w') as f:
            print('UPDRS Descriptives\n\n', file=f)
print('Number of included patients: %i' % updrsScores.shape[0])
print()
for var in clin_vars:
    print(var)
    print('Mean: %.2f (sd: %.2f)' % (np.mean(updrsScores[var]),np.std(updrsScores[var])))
    print('Min: %.2f, max: %.2f' % (np.min(updrsScores[var]),np.max(updrsScores[var])))
    print()
    with open('results/updrs_descriptives_n%i.txt' % updrsScores.shape[0], 'a') as f:
        print('%s:\nMean: %.2f (sd: %.2f)\n' % 
              (var,np.mean(updrsScores[var]),np.std(updrsScores[var])), file=f)    
        

Number of included patients: 20

totalUPDRS3_On
Mean: 27.10 (sd: 9.60)
Min: 11.00, max: 42.00

totalUPDRS3_Off
Mean: 43.75 (sd: 11.63)
Min: 29.00, max: 67.00

totalUPDRS3_Diff
Mean: 16.65 (sd: 8.57)
Min: -4.00, max: 38.00

inclSideHandBrady_off
Mean: 8.85 (sd: 2.26)
Min: 6.00, max: 13.00

inclSideHandTremor_off
Mean: 3.85 (sd: 2.67)
Min: 0.00, max: 9.00

inclSideHandBrady_diff
Mean: 3.90 (sd: 1.95)
Min: 1.00, max: 7.00

inclSideHandTremor_diff
Mean: 2.10 (sd: 2.32)
Min: 0.00, max: 9.00

bradyBody_diff
Mean: 0.85 (sd: 0.57)
Min: 0.00, max: 2.00

legs_diff
Mean: 0.85 (sd: 3.09)
Min: -6.00, max: 6.00

gait_diff
Mean: 0.95 (sd: 0.97)
Min: 0.00, max: 3.00

posture_diff
Mean: 0.45 (sd: 1.12)
Min: -1.00, max: 3.00

facial_diff
Mean: 1.15 (sd: 0.91)
Min: -1.00, max: 3.00



## Part 2: Differences in 4 candidate movement features between pre- and post-med states

Analyze variance in features between pre and post-medication features.

Standardised features will be used, just as in predictive analysis. Features are standardised individually, only pre-medication features are used as reference for the standardisation function.

4 Main features pragmatically chosen based on importance in literature. Variance analysis will be conducted with multi-variate Analysis Of Variance (MANOVA), and 4 seperate repeated measures ANOVA as post-hoc test.


### Standardisation, normalisation of features

In [14]:
def zScoreFeatures(features, scaling):
    
    '''
    Features is dict with feature dataframe for every patient separate. Inlcudes all on and off features per minute.
    '''
    ## Analyze z-scored features, on vs off
    zFeats = {}
    firstPt = list(features.keys())[0]
    allFts = [f for f in features[firstPt].keys().tolist() if f != 'medState']
    ## Empirical choise of 3 main features:
    mainFts = ['SVM_maxAcc','SVM_coefVar','SVM_RMS','SVM_specPow_totalu4Hz']

    z_total = pd.DataFrame(columns = allFts)
    z_pre = pd.DataFrame(columns = allFts)
    z_post = pd.DataFrame(columns = allFts)
    group_ft = pd.DataFrame(columns = allFts)
    
    list_pt = []
    

    for pt in list(features.keys()):

        # minSize is shortest length of samples(rows) per pt-side of dataframe, on or off samples
        minSize = min(sum(features[pt]['medState']==0),sum(features[pt]['medState']==1))
        # create predData per pt, per side, with balanced rows pre and post med (minSize)
        predData = pd.concat([features[pt][features[pt]['medState']==0][:minSize],features[pt][features[pt]['medState']==1][:minSize]],axis=0,sort=False).reset_index(drop=True)
        # add un-standarized features to group_z, standarization follows later on full group
        group_ft = pd.concat([group_ft,predData],axis=0,sort=False).reset_index(drop=True)
        # collect pt-numbers
        list_pt.extend([pt]*minSize*2)

        # standarize data or normalize data
        if scaling == 'Stand':
            scaler = StandardScaler()
        elif scaling == 'Norm':
            scaler = MinMaxScaler()
            
        if scaling != 'None':
            scaler.fit(predData[predData['medState']==0][allFts]) # standardized only on pre-medication data
    #         scaler.fit(predData[allFts]) # use pre and post data for standarization (total mean=0, sd=1)

            # create x and y dataset for this patient
            x = pd.DataFrame(data = scaler.transform(predData[allFts]), columns = allFts)
            y = predData['medState']
            # adding x and y again to one df
            x['medState'] = y
            zFeats[pt] = x
            z_total = pd.concat([z_total,x],axis=0,sort=False).reset_index(drop=True)
            z_pre = pd.concat([z_pre,x[x['medState']==0]],axis=0,sort=False).reset_index(drop=True)
            z_post = pd.concat([z_post,x[x['medState']==1]],axis=0,sort=False).reset_index(drop=True)
        
        elif scaling == 'None':
            z_total = pd.concat([z_total,predData],axis=0,sort=False).reset_index(drop=True)
    
    z_total['pt'] = list_pt
    print(z_total.shape, z_pre.shape, z_post.shape)

    return z_total, z_pre, z_post, zFeats
    

In [15]:
mainFts = ['SVM_maxAcc','SVM_coefVar','SVM_RMS','SVM_specPow_totalu4Hz']
z_total, z_pre, z_post, zFeatsPt = zScoreFeatures(features, 'Stand')
n_total, n_pre, n_post, nFeatsPt = zScoreFeatures(features, 'Norm')
# raw_total,_,_,_ = zScoreFeatures(features, 'None')


(2380, 105) (1190, 104) (1190, 104)
(2380, 105) (1190, 104) (1190, 104)


### MANOVA - Multifactorial Analysis of Variance
To test if the 20 patients differ in pre-medication and post-medication state based on 4 main features which are averaged per patient, per medication state.
Features are standardised individually. Only pre-medication data is used to fit standardisation formula.

In [12]:
def manova(z_or_n, save):
    '''
    # Literature on MANOVA:
    # https://www.marsja.se/python-manova-made-easy-using-statsmodels/
    # https://www.real-statistics.com/multivariate-statistics/multivariate-analysis-of-variance-manova/two-way-manova/
    # https://github.com/statsmodels/statsmodels/issues/6464
    # https://statistics.laerd.com/spss-tutorials/two-way-manova-using-spss-statistics.php

    For unbalanced groups" `typ=3`. Type 3 sums of squares (SS) is 
    recommended for an unbalanced design for multifactorial ANOVA.
    ols = Ordinary Least Squares (OLS) model
    error: https://stackoverflow.com/questions/53489106/statsmodels-ols-inf-or-nan-error-when-there-is-none-in-dataset'''
### Create (M)ANOVA table in long data form with mean values per patient
    if z_or_n == 'std':
        z_or_n = z_total # define to use stand. or norm. data
    elif z_or_n == 'norm':
        z_or_n = n_total # define to use stand. or norm. data
    anova_data = pd.DataFrame(columns=['pt','medState']+mainFts)
    dat={}
    for ft in mainFts:
        dat['pt']=[]
        dat['medState']=[]
        dat[ft]=[]
        for p,pt in enumerate(features.keys()):
            for med in [0,1]:
                dat['pt'].append(p)
                dat['medState'].append(med)
                m = np.mean(
                    z_or_n[np.logical_and(
                        z_or_n['pt']==pt,z_or_n['medState']==med)]
                    [ft])
                dat[ft].append(m)
    for col in anova_data.keys():
        anova_data[col] = dat[col]
    
# anova_data.to_csv(os.path.join(path,'manova_data.csv'))
    maov = MANOVA(endog= anova_data[mainFts], exog=anova_data['medState']) # 
    print(maov.mv_test())
    if save:
        with open('results/manova_table.txt', 'w') as f:
            print(maov.mv_test(), file=f)
    return anova_data   

In [13]:
anova_data = manova('std',False)




                 Multivariate linear model
                                                            
------------------------------------------------------------
           x0           Value  Num DF  Den DF F Value Pr > F
------------------------------------------------------------
          Wilks' lambda 0.3878 4.0000 36.0000 14.2080 0.0000
         Pillai's trace 0.6122 4.0000 36.0000 14.2080 0.0000
 Hotelling-Lawley trace 1.5787 4.0000 36.0000 14.2080 0.0000
    Roy's greatest root 1.5787 4.0000 36.0000 14.2080 0.0000



### Post-Hoc Analysis
1: Repeated Measures ANOVA to find individual p-values of 4 features  leading to signicicant difference in MANOVA.

2: Correct p-value for multiple comparison with False Detection Rate. 

In [14]:
def posthoc(anova_data, save):
    '''
    Analyse single features with repeated measures ANOVA (equal to t-test) 
    to find which features causes signifcant difference found in MANOVA.
    ''' 
    if save:
    # to write text result in to a text-file
        with open('results/posthoc_rm_anova.txt', 'w') as f:
            print('Post-Hoc analysis after MANOVA, with repeated measures ANOVA (4 features)\n', file=f)

    for ft in mainFts:
        rm1way = AnovaRM(anova_data, ft, 'pt',within=['medState']) 
        # AnovaRM: first data, then independet var, subject ident, within=independet var
        res = rm1way.fit()
        if save:
            with open('results/posthoc_rm_anova.txt', 'a') as f:
                print(ft,':', file=f)
                print(res, file=f)
        print(ft,':')
        print(res)

    ## FDR corrected significancies
    p_fdr,_,_,_ = multipletests([0.2501,0.0042,0.8538,0.3231],
                                    alpha=0.05, method='fdr_bh')
    sign_main_fts = list(compress(mainFts,p_fdr))
    print()
    print('Feature(s) with signficant difference:',sign_main_fts) 
    if save:
        with open('results/posthoc_rm_anova.txt', 'a') as f:
            print('\nFeature(s) with signficant difference:',sign_main_fts, file=f)    

    return sign_main_fts

In [15]:
sign_feats = posthoc(anova_data, False)

SVM_maxAcc :
                Anova
         F Value Num DF  Den DF Pr > F
--------------------------------------
medState  1.4074 1.0000 19.0000 0.2501

SVM_coefVar :
                Anova
         F Value Num DF  Den DF Pr > F
--------------------------------------
medState 10.5744 1.0000 19.0000 0.0042

SVM_RMS :
                Anova
         F Value Num DF  Den DF Pr > F
--------------------------------------
medState  0.0349 1.0000 19.0000 0.8538

SVM_specPow_totalu4Hz :
                Anova
         F Value Num DF  Den DF Pr > F
--------------------------------------
medState  1.0293 1.0000 19.0000 0.3231


Feature(s) with signficant difference: ['SVM_coefVar']


## Visualization of 4 candidate features with violin plot

In [47]:
def plotViolins(zmethod, sign_feats, save):
    feats = ['SVM_maxAcc','SVM_coefVar','SVM_RMS','SVM_specPow_totalu4Hz']
    ls = 20 # labelsize
    ts = 24 # titlesize
    # colors to use for violinplot
    col1,col2 = list(sns.color_palette("Set2"))[1],list(sns.color_palette("Paired"))[2]
    if zmethod == 'std':
        z_or_n = z_total # define to use stand. or norm. data
        y_label='Patient mean of \nstandarized feature'
    elif zmethod == 'nrm':
        z_or_n = n_total
        y_label='Patient mean of \nnormalized feature'
#     elif zmethod == 'None':
#         z_or_n = raw_total
#         y_label='Patient mean of feature' 
    anova_data = pd.DataFrame(columns=['pt','medState']+mainFts)
    dat={}
    for ft in mainFts:
        dat['pt']=[]
        dat['medState']=[]
        dat[ft]=[]
        for p,pt in enumerate(features.keys()):
            for med in [0,1]:
                dat['pt'].append(p)
                dat['medState'].append(med)
                m = np.mean(
                    z_or_n[np.logical_and(
                        z_or_n['pt']==pt,z_or_n['medState']==med)]
                    [ft])
                dat[ft].append(m)
    for col in anova_data.keys():
        anova_data[col] = dat[col]
    
    violin_data = pd.melt(anova_data, id_vars=['medState'], 
            value_vars=mainFts)
    violin_data.loc[violin_data['medState']==0,'medState'] = 'Pre-med (OFF)'
    violin_data.loc[violin_data['medState']==1,'medState'] = 'Post-med (ON)'
    violin_data = violin_data.rename(columns={'variable':'Features',
        'medState':'Medication Status','value':y_label})

    # plot violin
    fig, ax = plt.subplots(figsize=(12, 8))
    a = sns.violinplot(ax=ax, x='Features',y=y_label,
            hue='Medication Status',
        data=violin_data, palette=[col1,col2], split=True,scale='count',
                        inner='stick', figsize=(24,8))
    # Add significancy stars
    for n,ft in enumerate(mainFts):
        if ft  in sign_feats:
            if zmethod=='std':
                ax.annotate('*', xy= (n, 2.1), fontsize=34, color='red')
            elif zmethod=='nrm':
                ax.annotate('*', xy= (n, .9), fontsize=34, color='red')
#     ax.plot([],[],'',c='white',label='* = Significancy (alpha = 0.05')
   
    ax.set_title('Main Features during Pre- vs Post-medication States', size=ts)
    if zmethod=='std':
        ax.set_ylim(-2,2.5)
        ax.set_yticks(np.arange(-2,2.5,1))
        ax.set_yticklabels(np.arange(-2,2.5,1),size=ls)
        (xmin,xmax) = ax.get_xlim()
        ax.hlines(np.linspace(-2,2,5), xmin= xmin, xmax= xmax, color='gray', alpha=0.5)
    elif zmethod=='nrm':
        ax.set_ylim(0,1.1)
        ax.set_yticks(np.linspace(0,1,5))
        ax.set_yticklabels(np.linspace(0,1,5),size=ls)
        (xmin,xmax) = ax.get_xlim()
        ax.hlines(np.linspace(0,1,5), xmin= xmin, xmax= xmax, color='gray', alpha=0.5)
    ax.set_xticklabels(['Maximum\nAcceleration','Coefficient\nof Variance',
                        'Root Mean\nSquare','Spectral Power\nbelow 4 Hz'], size=ls)
    ax.set_xlabel('Features', size=ls)
    ax.set_ylabel(y_label, size=ls)
    ax.legend(fontsize=ls, )
    fig = ax.get_figure()
    if save:
        fig.savefig(os.path.join(path,'figures/%s_violin_4fts_sig.png' %zmethod), dpi=300)

    

In [146]:
# plotViolins('std', sign_feats , True)



In [147]:
# plotViolins('nrm', sign_feats, True)



## Part 3: Classification of Pre- and Post-med states 

Results and figures are saved to repective folders in main path. Each can be created for every model (SV vs RF, group vs individual), and data selection approach (4 vs all features, with vs without activity filtering).




### 3a: Barplots to visualize classification accuracy and auroc for two selected models 

In [232]:
def predictionBars(two_models, model_labels, fig_title, save, save_filename):
    
    '''Always shows Accuracy and AUROC, number of lists to compare decides number of figures.
    Lists to compare has to be list of lists of two groups.[[A,B], [C,D], [A,C]]
    Colors to visualize.
    '''
    pts = np.unique([col[:3] for col in dictout[two_models[0]].keys()])    
    # settings
    ls= 20 # standard labelsize
    ts= 24 # standard titlesize 
    fig,axes = plt.subplots(2,1, figsize=(20,12))
    colors = ['green','salmon'] # colors in respective order
    widths = [-0.3,0.3] # position on x-axis for bars
    annpos = [-0.2,0.1] # corrections to plosition the annotation stars correctly
    for fig_row, metric in enumerate(['Accuracy','AUROC']):
        # show individual means with significancy (n=20) per outcome-metric
        for d,dct in enumerate(two_models):
            heights = []
            sds = []
            for key in dictout[dct].keys():
                if np.logical_and(metric in key,len(key) == len(metric)+4): # filters out XXX_METRIC keys
                    heights.append(dictout[dct].loc['pred'][key])
                if np.logical_and(metric in key, 'sd' in key): # filters out all '_sd' keys
                    sds.append(dictout[dct].loc['pred'][key])
            if 'group' in dct:
                sds = [0]*len(pts)
#             print('Analysis: %s (n=%i), mean %s = %.3f, sd = %.2f' % 
#                   (dct,len(heights),metric,np.mean(heights),np.std(heights)))
            heights = [np.mean(heights)]+heights
            list_sd = [np.std(heights)]+sds
            axes[fig_row].bar(x=np.arange(21), height=heights, yerr=list_sd,
                    width=widths[d], align='edge', color=colors[d], label= model_labels[d] )

#                 # ADD LATER: significancy level with star annotation, and FDR correction!
#                 for x_pos,pt in enumerate(table.index):
#                     if table.loc[pt][methods[method]+metric+'_sign'] == 1:
#                         star_heigth = heights[x_pos+1] + list_errs[x_pos+1]/2 + 0.1
#                         axes[fig_row].annotate('*', xy=(x_pos+1 +annpos[m], star_heigth), fontsize=20, color=colors[m])
            # legend item for sign-asterixes
#             axes[fig_row].plot([],[],'',c='white',label='* = Significancy, p = 0.05')

        # figure design/layout
        axes[fig_row].tick_params(labelsize=ls)
        x_labels = ['Mean']
        x_labels.extend(pts)
        axes[fig_row].set_xticklabels(x_labels)
        axes[fig_row].set_xticks(np.arange(len(x_labels)))
        axes[fig_row].set_title(metric, fontsize=ts)
        axes[fig_row].axhline(y=0.5, ls='--', c='k', lw=1.5, label='Chance-level')
        for yline in [0.2,0.4,0.6,0.8]:
            axes[fig_row].axhline(y=yline, ls='-', c='gray', lw=0.5, )
        for yline in [0.1,0.3,0.7,0.9]:
            axes[fig_row].axhline(y=yline, ls='--', c='gray', lw=0.3, )
        axes[fig_row].axhline(y=1, ls='-', c='k', lw=0.5, )
        axes[fig_row].set_ylim(0, 1.1)
        axes[fig_row].set_yticks(np.arange(0,1.1,0.2))
        axes[fig_row].legend(fontsize=ls, loc='upper right', ncol=4)
    plt.suptitle(fig_title, fontsize=ts+6, y=.98)
    plt.tight_layout(h_pad=1, pad=2)


    if save == True:
        plt.savefig(os.path.join(path,'figures',save_filename), dpi=300)

    plt.show()

    

In [17]:
print('List of groups to pick from for comparison:\n')
for key in list(dictout.keys()):
    print('%s' % key)

List of groups to pick from for comparison:

indiv_SV_allfts_actfilter
indiv_SV_allfts
indiv_SV_4fts_actfilter
indiv_SV_4fts
indiv_RF_allfts_actfilter
indiv_RF_allfts
indiv_RF_4fts_actfilter
indiv_RF_4fts
group_SV_allfts_actfilter
group_SV_allfts
group_SV_4fts_actfilter
group_SV_4fts
group_RF_allfts_actfilter
group_RF_allfts
group_RF_4fts_actfilter
group_RF_4fts


In [365]:
# predictionBars(['group_SV_4fts_actfilter','indiv_SV_allfts_actfilter'],
#                         ['4 feature model (SV group, filtered)',
#                          '103 feature model (SV individual, filtered)'], 
#                'Models based on 4 vs 103 features', 
#                True, 'bars_4_vs_103fts_(best)')



In [366]:
## significance to add
# predictionBars(['indiv_SV_allfts_actfilter','group_SV_allfts_actfilter'], 
#                ['Individual','Group'], 
#                'Individual vs Group Model (both SV and activity filtered)', 
#                False, 'bars_GroupVsIndiv_(SV_actfilter)')





In [328]:
updrsScores[['inclSideHandBrady_diff','inclSideHandTremor_diff']].T

PtId,002,012,013,014,015,017,018,023,024,038,039,043,047,051,054,058,063,065,079,090
inclSideHandBrady_diff,4.0,2.0,6.0,2.0,5.0,4.0,1.0,2.0,4.0,7.0,6.0,4.0,2.0,7.0,6.0,5.0,4.0,1.0,1.0,5.0
inclSideHandTremor_diff,2.0,6.0,9.0,1.0,1.0,0.0,1.0,3.0,4.0,3.0,0.0,0.0,2.0,0.0,1.0,0.0,2.0,0.0,2.0,5.0


### 3b: Equality-plots to compare classification results between two approaches per participant (e.g. individual vs group)

In [169]:
def equality_plots(models,mod_labels,blackwhite,fig_title,save,save_filename):
    '''
    models = list of two models from dictout.
    mod_labels= list of the figure labels corresponding to the two models.
    blackwhite = Boolean
    fig_title = string for title in fig.
    save=Boolean, save_filename = string to store figure-file.
    
    P-value correction (and choice of type) for multiple comparison is done within script.
    
    Makes two ewuality plots, AUROC (l) and Accuracy (r), 
    with indications of significances.
    First model in list will be X-AXIS, second will be Y-AXIS.'''
    
    metrics = ['AUROC','Accuracy']
    p_tresh = 0.05
    cols = ['%s_%s'%(mod,metr) for mod in ['1','2'] for metr in metrics]
    cols = cols+ [col+'_sign' for col in cols]+['AUROC_sign_comb','Accuracy_sign_comb']
    eq_dat = pd.DataFrame(index=updrsScores.index, columns=cols)
    
    for n,mod in enumerate(models): # create eq_dat
        for met in metrics:
            for pt in eq_dat.index:
                eq_dat.at[pt,'%i_%s' % (n+1,met) ] = dictout[mod].loc['pred']['%s_%s' % (pt,met)]
# #                 # signifiance without correction:
#                 eq_dat.at[pt,'%i_%s_sign' % (n+1,met) ] = dictout[mod].loc['p']['%s_%s' % (pt,met)] < p_tresh
#             # significance with multicomparison p-correction
            ps = [dictout[mod].loc['p']['%s_%s' % (pt,met)] for pt in eq_dat.index]
            ps_fdr,_,_,_ = multipletests(ps,alpha=p_tresh, method='fdr_bh')
            eq_dat['%i_%s_sign' % (n+1,met)] = ps_fdr.astype(int)
    for pt in eq_dat.index:
        for met in metrics: # combined sign: 1= only 1, 2=only 2, 3=both not, 4 BOTH SIGN
            if np.logical_and(eq_dat.loc[pt]['1_%s_sign' % met] == 1,eq_dat.loc[pt]['2_%s_sign' % met] == 0):
                eq_dat.at[pt,'%s_sign_comb' % met] = 1
            elif np.logical_and(eq_dat.loc[pt]['1_%s_sign' % met] == 0,eq_dat.loc[pt]['2_%s_sign' % met] == 1):
                eq_dat.at[pt,'%s_sign_comb' % met] = 2        
            elif np.logical_and(eq_dat.loc[pt]['1_%s_sign' % met] == 0,eq_dat.loc[pt]['2_%s_sign' % met] == 0):
                eq_dat.at[pt,'%s_sign_comb' % met] = 3     
            elif np.logical_and(eq_dat.loc[pt]['1_%s_sign' % met] == 1,eq_dat.loc[pt]['2_%s_sign' % met] == 1):
                eq_dat.at[pt,'%s_sign_comb' % met] = 4
            else:
                print('why')

    fig, axes = plt.subplots(1,len(metrics),figsize=(26,12))
    dotSize = 250
    fs=30
    ts=36
    for n,metric in enumerate(metrics):
        x_ax = '1_%s' % metric
        y_ax = '2_%s' % metric
        if blackwhite:
            # use triangle for ind-sig, round for ind-non-sig
            # use filled for group-sig, non-filled for group-non-sig
            scatter = axes[n].scatter(eq_dat[eq_dat['%s_sign_comb' % met]==4][x_ax],eq_dat[eq_dat['%s_sign_comb' % met]==4][y_ax], 
                                      s=dotSize, lw=2, c='k',marker='^',  label='Both significant') #edgecolors='k', facecolors='none',
            scatter = axes[n].scatter(eq_dat[eq_dat['%s_sign_comb' % met]==1][x_ax],eq_dat[eq_dat['%s_sign_comb' % met]==1][y_ax], 
                                      s=dotSize, lw=2,marker='^', label='Only %s significant' % mod_labels[0],edgecolors='k', facecolors='none',)
            scatter = axes[n].scatter(eq_dat[eq_dat['%s_sign_comb' % met]==2][x_ax],eq_dat[eq_dat['%s_sign_comb' % met]==2][y_ax], s=dotSize, lw=2,c='k',
                                      marker='o',  label= 'Only %s significant' % mod_labels[1],   ) #edgecolors='k', facecolors='none',
            scatter = axes[n].scatter(eq_dat[eq_dat['%s_sign_comb' % met]==3][x_ax],eq_dat[eq_dat['%s_sign_comb' % met]==3][y_ax], s=dotSize, lw=2,
                                      marker='o', label='None significant',edgecolors='k', facecolors='none',)
        else:
            # use triangle for ind-sig, round for ind-non-sig
            # use filled for group-sig, non-filled for group-non-sig
            scatter = axes[n].scatter(eq_dat[eq_dat['%s_sign_comb' % met]==4][x_ax],eq_dat[eq_dat['%s_sign_comb' % met]==4][y_ax],
                                      s=dotSize, lw=2, color='green',marker='o',  label='Both significant') 
            scatter = axes[n].scatter(eq_dat[eq_dat['%s_sign_comb' % met]==1][x_ax],eq_dat[eq_dat['%s_sign_comb' % met]==1][y_ax], 
                                      s=dotSize, lw=2,color='darkblue',marker='o', label='Only %s \nsignificant' % mod_labels[0],)
            scatter = axes[n].scatter(eq_dat[eq_dat['%s_sign_comb' % met]==2][x_ax],eq_dat[eq_dat['%s_sign_comb' % met]==2][y_ax], 
                                      s=dotSize, lw=2,color='goldenrod',marker='o',  label='Only %s \nsignificant' % mod_labels[1],   ),
            scatter = axes[n].scatter(eq_dat[eq_dat['%s_sign_comb' % met]==3][x_ax],eq_dat[eq_dat['%s_sign_comb' % met]==3][y_ax], 
                                      s=dotSize, lw=2,edgecolors='k',facecolors='lightgray',marker='o', label='None significant',)

        # plot equality line
        xStart, xEnd, yStart, yEnd = 0.2, 1.0, 0.2, 1.0
        axes[n].axis([xStart, xEnd, yStart, yEnd])
        axes[n].plot(np.linspace(xStart,xEnd,10),np.linspace(yStart,yEnd,10), c='k', 
                         ls='dotted',lw=5, label='Equality of models',alpha=0.7)
        # details for main figure
        axes[n].hlines([0.4,0.6,0.8], xmin=0.2, xmax=1, color='gray', alpha=0.5)
        axes[n].vlines([0.4,0.6,0.8], ymin=0.2, ymax=1, color='gray', alpha=0.5)
        axes[n].set_xlabel('%s score' % mod_labels[0], size=fs)
        axes[n].set_ylabel('%s score' % mod_labels[1], size=fs)
        axes[n].set_title('%s' % (metric), size=ts)
#         axes[n].legend(loc='lower left',fontsize=24, )
        axes[n].tick_params(labelsize=fs,direction='out', length=6, width=2, colors='k',
               grid_color='r', grid_alpha=0.5)
    plt.legend(loc='upper left',fontsize=24, bbox_to_anchor=(1.05, 1),)
    plt.suptitle(fig_title, fontsize=ts+6, y=.98)
    plt.tight_layout(h_pad=1, pad=2)
    if save:
        plt.savefig(os.path.join(path,'figures',save_filename), dpi=150)
    plt.show()        


In [364]:
# # Best models based on 4 vs 103 features
# equality_plots(['group_SV_4fts_actfilter','indiv_SV_allfts_actfilter'],
#                         ['4 feature model \n(SV group, filtered)',
#                          '103 feature model \n(SV individual, filtered)'],
#                         False, 'Models with 4 vs 103 features (both group SV, activity filtered)',
#                         True,'EQplot_4_vs_103fts_(best)(fdr)')



In [363]:
# # Best individual vs best group model
# equality_plots(['indiv_SV_allfts_actfilter','group_SV_allfts_actfilter'],
#                         ['Individual model','Group model'],
#                         False, 'Individual vs Group models (both SV, activity filtered)',
#                         True,'EQplot_Indiv_vs_Group_SV_actfilter(fdr)')


In [362]:
# Best model without vs with filtering
# equality_plots(['indiv_SV_allfts','indiv_SV_allfts_actfilter'],
#                         ['Not filtered\n(individual SV)','Activity filtered\n(individual SV)'],
#                         False, 'Best model without vs best model with activity filtering',
#                         True,'EQplot_W_vs_WO_actfilter_SV_indiv(fdr)')



## Part 4: Correlating predictive outcomes with clinical scores

In [358]:
## correlations updrs sub-scores with outcome, new datasplitting analyses

def plot_clin_corr(models,mod_labels,save,save_filename):
    '''
    models: defines one or two models which are compared with subscores
    '''
    subscores = ['inclSideHandBrady_%diff','inclSideHandTremor_%diff',
                     'bradyBody_%diff','legs_%diff','gait_%diff',
                     'posture_%diff','facial_%diff' ]
    subscorelabels = ['Hand brady','Hand tremor','Body brady','Legs','Gait',
                     'Posture','Face' ]
    metrics = ['AUROC', 'Accuracy']
    corr_dat = pd.DataFrame(index=updrsScores.index) # make df for this plot
    for sub in subscores:
        corr_dat[sub] = updrsScores[sub] # import subscores per pt
    for n,mod in enumerate(models):
        for met in metrics: # import metric scores, number 1, 2
            corr_dat['%i_%s'%(n+1,met)] = [dictout[mod].loc['pred']['%s_%s' % (pt,met)] for pt in pts]
    
    plt.figure(figsize=(8*len(models),8))
    corr_list = {}
    for clinVar in subscores:
        corr_list[clinVar] = [] # create per subscore list of two corr's per model in models-list
        for n,mod in enumerate(models):
            for met in metrics:
                r,p = spearmanr(corr_dat[clinVar], corr_dat['%i_%s'%(n+1,met)]) # calculate corr
                corr_list[clinVar].append(r) # add r-value to list per subscore

    xgroups = []
    for modlab in mod_labels:
        xgroups.extend(['%s\nAUROC'%modlab,'%s\nAccuracy'%modlab])
    x = np.arange(len(xgroups))
    width = 0.1
    lw = 0.2
    cs = 10
    for n,clinVar in enumerate(subscores):
        plt.bar(x+ 0.1*n, corr_list[clinVar], label=subscorelabels[n],  
                capsize=cs,linewidth=lw, width=width, )#color='green' , yerr=stdsOFF, 

    plt.ylabel('Spearman Correlation R', size=24)
    plt.title('Clinical MDS-UPDRS III subscores vs. Predictive performance with activity filtering', size=28)

    plt.xticks(np.arange(0.3,len(xgroups)+.3,1),xgroups, rotation = 45, fontsize =24)
    plt.yticks(fontsize=20)
    plt.ylim(-.5,0.5)
    plt.legend(loc='lower center',ncol=4,prop={'size': 24})
    plt.rcParams["axes.grid"] = False
    for h in np.arange(-0.3,0.5,0.1):
        plt.axhline(y=h, zorder=0, color='lightgray')
    plt.tight_layout()
    if save:
        plt.savefig(os.path.join(path,'figures','%s.png' % save_filename), dpi=300)

    plt.show()       





In [361]:
# Paper FIG5
# plot_clin_corr(['indiv_SV_allfts_actfilter','group_SV_allfts_actfilter'],
#                ['Individual SV','Group SV'],
#                True,'subscores_vs_performance_SVs_filtered')


In [360]:
# ## SUPPL FIG 6 (pm adjust title in function for w or w/o filter)
# plot_clin_corr(['indiv_SV_allfts_actfilter','indiv_RF_allfts_actfilter',
#  'group_SV_allfts_actfilter','group_RF_allfts_actfilter'],
# ['Individual SV','Individual RF',
#  'Group SV','Group RF'],
#              True,'subscores_vs_performance_actfilter')



## Part 5: Visualizing effect of different number of trainings patients on group model


In [320]:
def plot_training_pts(actfilter,save):
    fig,ax=plt.subplots(1,1,figsize=(16,8))
    colors=['purple','orange']
    lstyles=['-','--']
    for c,cls in enumerate(['SV','RF']):
        if actfilter:
            dat=pd.read_csv(os.path.join(path,'results','preds_%s_nr_train_pts_actfilter.csv'%cls), index_col=0)
        else:
            dat=pd.read_csv(os.path.join(path,'results','preds_%s_nr_train_pts.csv'%cls), index_col=0)
        scores = pd.DataFrame(index=np.arange(1,20), columns=['AUROC','AUROC_sd','Accuracy','Accuracy_sd'])
        for n_pts in scores.index:
            for met in ['AUROC','Accuracy']:
                scores.at[n_pts,met] = np.mean(dat['%s_mean_n%i'%(met,n_pts)]) # mean and std's over tested patients
                scores.at[n_pts,met+'_sd'] = np.std(dat['%s_mean_n%i'%(met,n_pts)])
        for m,met in enumerate(['AUROC','Accuracy']):
            x_plot = scores.index
            ax.plot(x_plot, scores[met], label='Mean %s (%s)' % (met,cls), linestyle=lstyles[m],
                    c=colors[c], lw=2)
#             high = np.array(scores[met]-scores['%s_sd' %met]).astype(float)
#             low = np.array(scores[met]+scores['%s_sd' %met]).astype(float)
#             ax.fill_between(x_plot, low,high, color= colors[m], alpha=0.1, label='Std dev %s'%met)
    # config figure
    ax.legend(fontsize=20)
    ax.set_xlabel('Number of patients used for training', fontsize=20)
    ax.set_xticks(np.arange(1,20,2))
    ax.tick_params(labelsize=20)
    ax.grid(True, axis='both', which='major')
    if actfilter:
        ax.set_title('Effect of number of training patients in group models, with activity filter', fontsize=24)
    else:
        ax.set_title('Effect of number of training patients in group models', fontsize=24)

    plt.tight_layout()
    if save:
        if actfilter: 
            plt.savefig(os.path.join(path,'figures','Effect_numTrainingPtsGroupModels_actfilter.png'), dpi=300)
        else:
            plt.savefig(os.path.join(path,'figures','Effect_numTrainingPtsGroupModels.png'), dpi=300)
    plt.show()




In [331]:
# plot_training_pts(actfilter=False,save=True)
# plot_training_pts(actfilter=True,save=True)