### Systematic performance comparisons

Compare the following for recalibrating T5 cursor decoders:
- mean recalibration
- subspace realignment (optimized settings from `optimize_SubspaceRealignment.ipynb`)
- HMM (optimized settings from `optimize_vanillaHMM.ipynb`)

In [20]:
%load_ext autoreload
%autoreload 2

import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
import pandas as pd
from copy import deepcopy
import glob
import sys

[sys.path.append(f) for f in glob.glob('../utils/*')]
import preprocess, sweep_utils
from plotting_utils import figSize
from lineplots import plotsd
from session_utils import *
from recalibration_utils import *
from click_utils import *

import sweep_utils


DATA_DIR = '/oak/stanford/groups/shenoy/gwilson/nonstationarities/T5/'
methods  = ['HMM', 'Stabilizer', 'HMM-Stabilizer']
params   = [['kappa', 'inflection', 'exp'], ['method', 'n_components', 'B', 'thresh'], []]
scores   = list()

for m, p in zip(methods, params):
    files     = glob.glob(DATA_DIR + m + '/*/*')
    scores_df = sweep_utils.getSummaryDataFrame(files, p + ['R2_score', 'pearson_r', 'days_apart', 'file'])
    
    if m == 'HMM':
        opt_dict  = dict(zip(p, [2, 50, 0.1]))
        scores_df = sweep_utils.get_subsetDF(scores_df, opt_dict)
    if m == 'Stabilizer':
        opt_dict  = dict(zip(p, ['FactorAnalysis', 8, 180, 0.01]))
        scores_df = sweep_utils.get_subsetDF(scores_df, opt_dict)
    scores.append(scores_df)
    
    print(m, ' results loaded.')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


TypeError: unhashable type: 'list'

In [21]:
import torch

ModuleNotFoundError: No module named 'torch'

Now look at performance as a function of time between reference and new session:

In [None]:
from sklearn.ensemble import BaggingRegressor

n_bootstraps = 500

bs_models  = list()
opt_scores = list()
for i in range(5):
    # build a bootstrapping model:
    bootstrap_lr = BaggingRegressor(LinearRegression(), n_estimators = n_bootstraps, bootstrap = True, random_state = 42)
    bootstrap_lr.fit(diffs[:, np.newaxis], scores[i, :])
    bs_models.append(bootstrap_lr.estimators_)

In [None]:
figSize(7, 15)

colors = ['k', 'b', 'orange', 'r', 'orange']
labels = ['None', 'Means', 'Full', 'Subspace', 'HMM']
for i, bs in enumerate(bs_models):
    if i in [0, 1, 4]:
        for estimator in bs:
            plt.plot(diffs, estimator.predict(diffs[:, np.newaxis]), alpha = 0.01, color = colors[i])
        plt.scatter(diffs, scores[i, :], label = labels[i], color = colors[i])
plt.xlabel('Days between sessions')
plt.ylabel('R^2 (holdout)')
plt.title('Recalibration R^2 as a function of time between sessions', fontweight = 'bold')
plt.legend()

In [None]:
scores[4, :]

### check out badly performing days:

In [None]:
figSize(15, 15)

bad_days      = np.where(np.logical_and(diffs < 40, scores[4, :] < 0.1))[0]
good_days     = np.where(np.logical_and(diffs < 40, scores[4, :] > 0.3))[0]
width, height = np.ceil(np.sqrt(len(bad_days))).astype('int'), np.ceil(np.sqrt(len(bad_days))).astype('int')
FR_diffs      = np.zeros((2,  192, len(files) - 1, ))
decoder_preds = list()

for i in range(len(files) - 1):
    dayA = DataStruct(files[i])
    dayB = DataStruct(files[i + 1])

    if sum(dayB.trialType == task) * sum(dayA.trialType == task) != 0:
        dayA_task, dayB_task = 'cursor', 'cursor'   # try to get same task type for both sesions
    else:
        dayA_task = np.unique(dayA.trialType)[0]
        dayB_task = np.unique(dayB.trialType)[0]

    # Fit day A decoders: 
    Atrain_x, Atest_x, Atrain_y, Atest_y = getTrainTest(dayA, train_frac = train_frac, task = dayA_task, return_flattened = True)
    Btrain_x, Btest_x, Btrain_y, Btest_y = getTrainTest(dayB, train_frac = train_frac, task = dayB_task, return_flattened = True)
    
    FR_diffs[0, :, i] = Atrain_x.mean(axis = 0)
    FR_diffs[1, :, i] = Btrain_x.mean(axis = 0)
    lm_a              = LinearRegression(fit_intercept = False).fit(Atrain_x - Atrain_x.mean(axis = 0), Atrain_y)
    preds             = lm_a.predict(Btest_x - Btest_x.mean(axis = 0))
    
    decoder_preds.append([Btest_y, preds])
    

In [None]:
from sklearn.metrics import r2_score
from scipy.stats import spearmanr

subplt_size = 5
plt_days    = bad_days

#----------------------------------
figSize(subplt_size * len(plt_days), subplt_size * 3)

coord_type = ['X', 'Y']
for j, i in enumerate(plt_days):
    plt.subplot(len(plt_days), 3, 1 + (j * 3))
    x    = FR_diffs[0, :, i]
    y    = FR_diffs[1, :, i]
    corr = spearmanr(x, y)[0] 
    
    plt.scatter(x, y, color = 'b')
    maxval = max(x.max(), y.max())
    plt.xlim([0, maxval + 10])
    plt.ylim([0, maxval + 10])
    plt.xlabel('Mean FRs (reference)')
    plt.ylabel('Mean FRs (new)')
    plt.plot(plt.xlim(), plt.xlim(), linestyle = '--', color = 'k')
    plt.title('Mean FRs: r = ' + str(np.round(corr, 3)))
    
    for coord in range(2):
        x    = decoder_preds[i][0][:, coord]
        y    = decoder_preds[i][1][:, coord]
        corr = r2_score(x, y)
        #corr = np.corrcoef(x, y)[0, 1]

        plt.subplot(len(bad_days), 3, coord + 2 + (j * 3))
        plt.plot(x[4000:5000], color = 'k')
        plt.plot(y[4000:5000], color = 'r')
                             
        maxval = max(x.max(), y.max())
        minval = min(x.min(), y.min())
        #plt.xlim([minval, maxval ])
        #plt.ylim([minval, maxval ])
        plt.xlabel('Time (example period)')
        plt.ylabel('Coefficients (new)')
        plt.title(coord_type[coord] + ' predictions: R^2 = ' + str(np.round(corr, 3)))

plt.suptitle('Changes between reference and new day', fontweight = 'bold')
plt.tight_layout(rect=[0, 0.03, 1, 0.97])


In [None]:
from sklearn.linear_model import TheilSenRegressor
from resample import bootstrap_LinearRegression
figSize(10, 20)


plt_days = np.where(diffs < 4)[0]

#-----------------------------------
FR_changes = np.abs(np.mean(FR_diffs[1, :, :] - FR_diffs[0, :, :], axis = 0))
FR_corr    = [spearmanr(FR_diffs[1, :, i], FR_diffs[0, :, i])[0] for i in range(len(files) - 1) ]

plt.subplot(1, 2, 1)
plt.scatter(FR_changes, dayB_HMMscores[opt_D, opt_B, :])
#plt.scatter(FR_changes, dayB_HMMscores[opt_D, opt_B, :] / dayB_scores[opt_D, opt_B, :])

for i in plt_days:
    plt.scatter(FR_changes[i], dayB_HMMscores[opt_D, opt_B, i], color = 'r')
plt.ylim([-1, 1])
plt.xlabel('Mean FR difference')
plt.ylabel('Recalibration performance (R^2)')
plt.title('HMM performance vs. mean FR difference', fontweight = 'bold')

# bootstrap linear regression:
lm, means, coefs = bootstrap_LinearRegression(FR_changes[:, np.newaxis], dayB_HMMscores[opt_D, opt_B, :], regressor = TheilSenRegressor(), n_bootstraps= 500, random_state= 42)

for i in range(500):
    plt.plot(FR_changes, means[i, :] + np.multiply(FR_changes, coefs[i, :]), alpha = 0.01, color = 'b')
plt.plot(FR_changes, lm.predict(FR_changes[:, np.newaxis]), alpha = 1, color = 'b')
    
    
plt.subplot(1, 2, 2)
decoder_R2 = np.asarray([r2_score(decoder_preds[i][0], decoder_preds[i][1]) for i in range(len(files) - 1)])

plt.scatter(decoder_R2, dayB_HMMscores[opt_D, opt_B, :])
for i in plt_days:
    plt.scatter(decoder_R2[i], dayB_HMMscores[opt_D, opt_B, i], color = 'r')
plt.ylim([-1, 1])
plt.xlim([-1, 1])
plt.xlabel('Reference decoder performance on new day (R^2)')
plt.ylabel('Recalibration performance (R^2)')
plt.title('HMM performance vs. non-recalibrated decoder performance', fontweight = 'bold')

lm, means, coefs = bootstrap_LinearRegression(decoder_R2[:, np.newaxis], dayB_HMMscores[opt_D, opt_B, :], regressor = TheilSenRegressor(), n_bootstraps= 500, random_state= 42)

#for i in range(500):
#    plt.plot(dplt.xlim(), means[i, :] + np.multiply(np.asarray(plt.xlim()), coefs[i, :]), alpha = 0.01, color = 'b')
plt.plot(plt.xlim(), lm.predict(np.asarray(plt.xlim())[:, np.newaxis]), alpha = 1, color = 'b')

In [None]:
bad_days

In [None]:
for i in bad_days:
    print(DataStruct(files[i]).date, '  ', DataStruct(files[i+1]).date )

    