In [3]:
%load_ext autoreload
%autoreload 2

import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
from copy import deepcopy
import glob, sys


[sys.path.append(f) for f in glob.glob('../utils/*')]
from preprocess import DataStruct
from plotting_utils import figSize
from lineplots import plotsd
from hmm_utils import *
from hmm import *
from session_utils import *
from recalibration_utils import *

from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.decomposition import FactorAnalysis, PCA

min_nblocks = 3       # min number of blocks for a session to be include
max_ndays   = 30      # accept all pairs of sessions regardless of time between
#min_R2      = 0.1      # subselect days with good decoder transfer performance 

data_dir    = '/oak/stanford/groups/shenoy/gwilson/nonstationarities/'
participant = 'T5/historical/'
save_path   = '/oak/stanford/groups/shenoy/gwilson/nonstationarities/T5/train/'
files       = loadDataset(data_dir, participant)


# now preprocess by getting "good" sessions and removing those with low block counts:
sessions_check = np.load('../utils/misc_data/NewSessions_check.npy', allow_pickle = True).item()
files          = get_Sessions(files, min_nblocks, manually_remove = sessions_check['bad_days'])

init_pairs    = get_SessionPairs(files, max_ndays = max_ndays)
pairs         = init_pairs
n_pairs       = len(pairs)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
from hmm import *
from hmm_utils import prep_HMMData, get_DiscreteTargetGrid, train_HMMRecalibrate
from sklearn.metrics import r2_score
import itertools


# general settings:
diffs           = list()
task            = None
train_size      = 0.67
sigma           = 2
gridSize        = 20

for i, (A_file, B_file) in enumerate(pairs):
    dayA = DataStruct(A_file, alignScreens = True, causal_filter = 2)
    dayB = DataStruct(B_file, alignScreens = True, causal_filter = 2)
    

    #dayA_blocks             = [sessions_check[A_file] if A_file in sessions_check.keys() else None][0]
    #dayB_blocks             = [sessions_check[B_file] if B_file in sessions_check.keys() else None][0] 
    #dayA_task, dayB_task, _ = getPairTasks(dayA, dayB, task = task)
    dayA_blocks, dayB_blocks = None, None
    dayA_task, dayB_task    = None, None

    # obtain features and cursorError targets:
    
    Atrain_x, Atest_x, Atrain_y, Atest_y  = getTrainTest(dayA, train_size = train_size, sigma = None, blocks = dayA_blocks, task = dayA_task, returnFlattened = True)    
    Atrain_x, Atest_x  = get_BlockwiseMeanSubtracted(Atrain_x, Atest_x, concatenate = True)
    Atrain_y           = np.concatenate(Atrain_y)
    Atest_y            = np.concatenate(Atest_y)

    Btrain_x, Btest_x, Btrain_y, Btest_y, B_cursorPos, _  = getTrainTest(dayB, train_size = train_size, sigma = sigma, blocks = dayB_blocks, task = dayB_task, 
                                                                         returnFlattened = True, returnCursor = True)    

    Btrain_x, Btest_x  = get_BlockwiseMeanSubtracted(Btrain_x, Btest_x, concatenate = True)
    Btrain_y           = np.concatenate(Btrain_y)
    Btest_y            = np.concatenate(Btest_y)
    B_cursorPos        = np.concatenate(B_cursorPos)
    targetPos          = Btrain_y + B_cursorPos

    A_decoder_score, A_decoder = traintest_DecoderSupervised([Atrain_x], [Atrain_x], [Atrain_y], [Atrain_y], meanRecal = False)    

    # add smoothing and session-specific information
    pair_data = dict()
    pair_data['A_file']     = A_file
    pair_data['B_file']     = B_file
    pair_data['days_apart'] = daysBetween(dayA.date, dayB.date)
    pair_data['task']       = None
    pair_data['train_size'] = 0.67
    pair_data['smoothing']  = 2
    
    pair_data['A_decoder_score'] = A_decoder_score
    pair_data['A_decoder']       = A_decoder
    
    pair_data['A_train_neural']  = Atrain_x
    pair_data['A_test_neural']   = Atest_x
    pair_data['A_train_targvec'] = Atrain_y
    pair_data['A_test_targvec']  = Atest_y
    
    pair_data['B_train_neural']  = Btrain_x
    pair_data['B_test_neural']   = Btest_x
    pair_data['B_train_targvec'] = Btrain_y
    pair_data['B_test_targvec']  = Btest_y
    pair_data['B_train_cursor']  = B_cursorPos
    pair_data['B_targLocs']      = get_DiscreteTargetGrid(dayB, gridSize = gridSize, task = dayB_task)
    
    
    save_fname = dayA.date + '_to_' + dayB.date + '.npy'
    np.save(save_path + save_fname, pair_data)
    
    if (i + 1) % int(np.round(len(pairs) / 10)):
        print(np.round((i + 1) * 100 / len(pairs), 1), '% complete')



0.7 % complete
1.5 % complete
2.2 % complete
2.9 % complete
3.7 % complete
4.4 % complete
5.1 % complete
5.9 % complete
6.6 % complete
7.4 % complete
8.1 % complete
8.8 % complete
9.6 % complete
11.0 % complete
11.8 % complete
12.5 % complete
13.2 % complete
14.0 % complete
14.7 % complete
15.4 % complete
16.2 % complete
16.9 % complete
17.6 % complete
18.4 % complete
19.1 % complete
19.9 % complete
21.3 % complete
22.1 % complete
22.8 % complete
23.5 % complete
24.3 % complete
25.0 % complete
25.7 % complete
26.5 % complete
27.2 % complete
27.9 % complete
28.7 % complete
29.4 % complete
30.1 % complete
31.6 % complete
32.4 % complete
33.1 % complete
33.8 % complete
34.6 % complete
35.3 % complete
36.0 % complete
36.8 % complete
37.5 % complete
38.2 % complete
39.0 % complete
39.7 % complete
40.4 % complete
41.9 % complete
42.6 % complete
43.4 % complete
44.1 % complete
44.9 % complete
45.6 % complete
46.3 % complete
47.1 % complete
47.8 % complete
48.5 % complete
49.3 % complete
50.0 