In [45]:
# all imports
import numpy as np
import pickle
from scipy.stats import zscore

from sklearn.linear_model import LinearRegression

# from sklearn.linear_model import Ridge
# from sklearn.model_selection import GridSearchCV
# from sklearn.decomposition import PCA
# import matplotlib.pyplot as plt


In [46]:
data = pickle.load(open('data/dataset.pickle', 'rb'))
print(data['info'])

responses: (11983 neurons x 7018 timebins) responses for each neuron at each time 
run_speed: (7018 timebins x 1) run speed for mouse at each time 
neuron_pos: (11983 neurons x 3) xyz position for each neuron 
pupil_area: (7018 timebins x 1) area of pupil for mouse at each time 
pupil_pos: (7018 timebins x 2) xy position of pupil for mouse at each time 
behavior_embedding: (7018 timebins x 1000) the behavior embedded in a vector size 1000 at each time 
movie: (7018 timebins x 240 x 320) the behavior movie at each time (corresponding to the embedding) 
movie_mask: (240 x 320 x 1) a mask to generate the movie, not important 
info: this text 



In [49]:
from numpy.lib.stride_tricks import sliding_window_view
zscored_X = zscore(data['responses'], axis=1).T

print(zscored_X.shape)

new_bins = [1, 2, 4, 8, 16, 60]
new_data = []

MAX_POINTS =  zscored_X.shape[0] - np.max(new_bins) + 1 # if we have 100 points and use 60 to predict the current, then we only get 41 data points.

for bin in new_bins:
    binned = window = np.squeeze(sliding_window_view(zscored_X, window_shape=(bin, zscored_X.shape[1])), axis=1)
    new_data.append(binned[:MAX_POINTS,])
    
# sanity check
for binned_data in new_data:
    print(f'{binned_data.shape} ', end='')
    



(7018, 11983)
(7015, 4, 11983) 

In [50]:
TRAIN_TEST_SPLIT = 6000 

regressions = [LinearRegression() for _ in range(len(new_data))]

Y = data['pupil_area'][:MAX_POINTS]
for reg_i in range(len(regressions)):
    bin_size = new_bins[reg_i]
    bin_data = new_data[reg_i]
    bin_regression = regressions[reg_i]
    
    # Number of datapoints to train on
    X = bin_data.reshape(bin_data.shape[0], bin_data.shape[1] * bin_data.shape[2])
    
    train_X = X[:TRAIN_TEST_SPLIT,]
    train_Y = Y[:TRAIN_TEST_SPLIT]
    test_X = X[TRAIN_TEST_SPLIT:,]
    test_Y = Y[TRAIN_TEST_SPLIT:]

    bin_regression.fit(train_X, train_Y)
    
    print('bin_size:', bin_size)
    print('training score:', bin_regression.score(train_X, train_Y))
    print('testing score:', bin_regression.score(test_X, test_Y))
