Code for examining how HMM performs under repeated recalibration


In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


import sys, glob
[sys.path.append(f) for f in glob.glob('../utils/*')]
from plotting_utils import figSize
from hmm import HMMRecalibration
import hmm_utils
from RTI_utils import RTI
import simulation_utils 
from simulation import simulateBCIFitts

# for a reproducible result
np.random.seed(1)

fig_path = '/home/users/ghwilson/projects/nonstationarities/figures/'
data_dir = '/oak/stanford/groups/shenoy/ghwilson/nonstationarities/simulator/efficiency/'

In [None]:
labels = ['supervised', 'HMM', 'Click HMM', 'RTI']

dicts = list()

for file in glob.glob(data_dir + '/*'):
    file_dict = np.load(file, allow_pickle = True)[0]
    
    for i, label in enumerate(labels):
        d = dict()
        d['ttt']       = file_dict['scores'][-1, i] 
        d['method']    = label
        d['nSimSteps'] = file_dict['nSimSteps']

        dicts.append(d)
        
df = pd.DataFrame(dicts)

In [6]:
len(dicts)

624

## Vanilla HMM vs RTI comparisons

In [None]:
figSize(5, 15)

colors = ['g', 'cyan', 'orange']
cmap   = dict(zip(labels, colors))

scores_df = df[df['methods'] in ]

sns.stripplot(data = scores_df, x = 'nSimSteps', y = 'ttt', 
              hue = 'method', alpha = 0.8, palette = cmap, dodge = True)

ax = plt.gca()

sns.boxplot(medianprops={'ls': '-', 'lw': 2, 'alpha' : 0.6,}, whiskerprops={'visible': False},
            zorder=1, x="nSimSteps", y="ttt", hue='method', data=scores_df, 
            showfliers=False, showbox=False, showcaps=False, ax=ax)

#plt.yscale('log')
handles, labels = ax.get_legend_handles_labels()
plt.legend(handles[3:],labels[3:], frameon=False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.savefig(fig_path + 'simulator/vanillaHMM_vs_clickHMM_efficiency', format = 'pdf')
plt.show()

In [None]:
import scipy.stats

for nSteps in [2000, 4000, 6000, 8000, 10000]:
    subset = scores_df[scores_df['nSimSteps'] == nSteps]
    
    print(' \n', 'Time (sec): ', 0.02 * nSteps)
    for i, method1 in enumerate(labels):
        for j, method2 in enumerate(labels):
            if i > j:
                stat = scipy.stats.wilcoxon(subset[subset['method'] == method1]['ttt'], subset[subset['method'] == method2]['ttt'])
                print(f'{method1} vs {method2}: p = ', stat)

## Click HMM vs HMM comparisons