Name: Holly Kular\
Date: 03-19-2024\
Email: hkular@ucsd.edu\
decode_L1.m\
Description: Script for decoding analysis on layer 1 of probabilistic RNN\

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import sys

In [4]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.svm import SVC  
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import make_classification  

In [5]:
# MODIFY HERE
# what conditions were the RNNs trained on?
prob_split = '70_30' # the probability of stimulus 1 vs all
afc = '6' # number of alternatives
coh = 'lo' # coherence
feedback = False # interlayer feedback (true or false)

In [11]:
# Data Directory
if sys.platform.startswith('linux'):
    data_dir = f"/mnt/neurocube/local/serenceslab/holly/RNN_Geo/data/rdk_{prob_split}_{afc}afc/feedforward_only/{coh}_coh"
else:
    data_dir = f"/Volumes/serenceslab/holly/RNN_Geo/data/rdk_{prob_split}_{afc}afc/feedforward_only/{coh}_coh"

# Load data
data = np.load(f"{data_dir}/Trials.npz")

In [12]:
# data['fr1'] data['outs'] data['labs']

KeysView(<numpy.lib.npyio.NpzFile object at 0x7fb1f0d2a370>)

## Compare decode choice vs. stim
Hypothesis: We can decode stimulus better than choice from layer 1 because of a more sensory-like format

### Decode choice

In [25]:
# Decode trials: RNN stim choice

# averge over this time window post stimulus
# this is unit of model time-steps
t_win = [ 200,-1 ]

# number of cv folds
n_cvs = 5 

# store the accuracy
acc = np.full( ( n_cvs ), np.nan )

# penalties to eval
num_cgs = 30
Cs = np.logspace( -5,1,num_cgs )

# set up the grid
param_grid = { 'C': Cs, 'kernel': ['linear'] }

# define object - use a SVC that balances class weights (because they are biased, e.g. 70/30)
# note that can also specify cv folds here, but I'm doing it by hand below in a loop
grid = GridSearchCV( SVC(class_weight = 'balanced'),param_grid,refit=True,verbose=0 )

# get the data from layer 1, decode choice
# this is a [trial x time step x unit] matrix
data_d = data['outs']
labs = data['outs']

In [28]:
data['outs'].shape

(100, 250)

In [26]:
# avg over time window
data_d = np.mean( data_d[ :,t_win[0]:t_win[1], : ], axis = 1 )

# get some info about structure of the data
tris = data_d.shape[0]             # number of trials
tri_ind = np.arange(0,tris)      # list from 0...tris
hold_out = int( tris / n_cvs )   # how many trials to hold out

# loop over cvs and do classification
for i in range(n_cvs):

    # trials to hold out as test set on this cv fold
    tst_ind = tri_ind[ i*hold_out : (i+1)*hold_out ]
    
    # index into the training data on this cv fold
    trn_ind = np.setdiff1d( tri_ind, tst_ind )

    # get the training data (X) and the training labels (y)
    # note that y is unbalanced unless prob is 50/50
    # todo: verify that SVC(class_weight = 'balanced')
    # is working as desired!
    # HK done: follows formula of n/ki
    X = data_d[trn_ind,:]
    y = labs[trn_ind]

    # fit the model
    grid.fit( X,y )

    # progress report
    print(f'CV: {i}, {grid.best_estimator_}')

    # get the test data (X) and the test labels (y)
    X_test = data_d[tst_ind,:]
    y_test = labs[tst_ind]

    # predict!
    acc[ i ] = grid.score( X_test,y_test )
        

print( np.mean( acc ) )
print(f'done decoding')

Traceback (most recent call last):
  File "/Users/hkular/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/hkular/opt/anaconda3/lib/python3.8/site-packages/sklearn/svm/_base.py", line 169, in fit
    X, y = self._validate_data(X, y, dtype=np.float64,
  File "/Users/hkular/opt/anaconda3/lib/python3.8/site-packages/sklearn/base.py", line 433, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "/Users/hkular/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/Users/hkular/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py", line 814, in check_X_y
    X = check_array(X, accept_sparse=accept_sparse,
  File "/Users/hkular/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/User

ValueError: Expected 2D array, got 1D array instead:
array=[ 1.25987538e-02  9.61640119e-03 -3.37013302e-03  1.42980061e-01
  9.18939156e-01  1.41882458e-02 -2.94296224e-04 -5.19451972e-02
  4.28692721e-02 -1.55551970e-02 -1.52397149e-02  2.19728890e-01
 -8.26654143e-03 -5.49193643e-02 -4.78498774e-02  3.33250400e-02
  2.80963312e-01 -2.85152414e-02  9.00256461e-01  4.12436454e-01
 -4.16945736e-02  3.93389952e-02 -2.32306057e-02  3.39701875e-01
 -1.98566083e-02  3.32582143e-02  1.11649219e-01  1.04274101e-01
 -2.35039109e-02 -1.12139669e-02 -2.97177718e-02 -7.31610455e-03
 -7.58473035e-03 -6.60579334e-02  2.66338976e-02 -4.25039365e-02
  7.22766342e-03  8.55086478e-01  7.53848734e-01 -4.28152653e-03
 -9.52600625e-03 -4.43927773e-02  8.52884734e-01 -1.69442939e-02
  2.20444195e-02  3.12134520e-01 -2.18735845e-03  5.69502576e-01
  3.91562736e-02 -4.39005345e-02  8.67619225e-01 -6.52574960e-03
  7.19966503e-02  5.62059524e-01  9.61155256e-02  3.67279263e-03
 -2.33428293e-02  6.12255625e-02 -3.26624690e-02 -1.00711834e-02
 -4.36956243e-02  4.95313584e-02 -3.42453442e-02  1.77015064e-02
 -5.16179561e-02  8.71852960e-01  8.63323734e-01 -5.46428688e-02
 -4.64975382e-02 -1.94723577e-02 -5.61414742e-03  8.86942303e-01
 -5.02947585e-02 -9.36670061e-03  8.86110107e-02  7.93078414e-01
 -3.94994595e-02 -5.65663971e-03 -3.20067809e-02 -7.25857334e-02].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

### Decode stim

______________________________________________________________________

## Next: Compare decode expected vs. unexpected
Hypothesis: We can decode expected stimulus better than unexpected because the RNN has acquired the expectation. 