Name: Holly Kular\
Date: 03-19-2024\
Email: hkular@ucsd.edu\
decode_L1_stimvschoice\
Description: Script for decoding analysis on layer 1 of probabilistic RNN

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import sys

In [3]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.svm import SVC  
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import make_classification  

In [79]:
# MODIFY HERE
# what conditions were the RNNs trained on?
prob_split = '70_30' # the probability of stimulus 1 vs all
afc = '6' # number of alternatives
coh = 'lo' # coherence
feedback = False # interlayer feedback (true or false)

In [80]:
# Data Directory
if sys.platform.startswith('linux'):
    data_dir = f"/mnt/neurocube/local/serenceslab/holly/RNN_Geo/data/rdk_{prob_split}_{afc}afc/feedforward_only/{coh}_coh"
else:
    data_dir = f"/Volumes/serenceslab/holly/RNN_Geo/data/rdk_{prob_split}_{afc}afc/feedforward_only/{coh}_coh"

# Load data
#data = np.load(f"{data_dir}/Trials.npz")
data = np.load(f"{data_dir}/Trials_0expected.npz")

In [15]:
# data['fr1'] data['outs'] data['labs']

## Compare decode choice vs. stim
Hypothesis: We can decode stimulus better than choice from layer 1 because of a more sensory-like format

In [81]:
def evaluate_threshold(X, labs, thresh, model):
  # Convert labels based on threshold
  labs_binary = np.where(labs >= thresh, 1, 0)
  
  # Train-test split
  X_train, X_test, y_train, y_test = train_test_split(X, labs_binary, test_size=0.2)
  
  # Fit the model
  model.fit(X_train, y_train)
  
  # Evaluate on test set
  predictions = model.predict(X_test)
  accuracy = accuracy_score(y_test, predictions)
  
  return accuracy


### Decode choice

In [82]:
# Decode trials: RNN stim choice

# averge over this time window post stimulus
# this is unit of model time-steps
t_win = [ 200,-1 ]

# number of cv folds
n_cvs = 5

# store the accuracy
acc = np.full( ( n_cvs ), np.nan )

# penalties to eval
num_cgs = 30
Cs = np.logspace( -5,1,num_cgs )

# set up the grid
param_grid = { 'C': Cs, 'kernel': ['linear'] }

# define object - use a SVC that balances class weights (because they are biased, e.g. 70/30)
# note that can also specify cv folds here, but I'm doing it by hand below in a loop
grid = GridSearchCV( SVC(class_weight = 'balanced'),param_grid,refit=True,verbose=0 )

# get the data from layer 1 decode choice
# this is a [trial x time step x unit] matrix
data_d = data['fr1']
labs = data['outs'][:,-1]

In [83]:
# Define threshold range (adjust as needed)
thresholds = np.arange(0.1, 0.9, 0.05)

# avg over time window
data_d = np.mean( data_d[ :,t_win[0]:t_win[1], : ], axis = 1 )

# get some info about structure of the data
tris = data_d.shape[0]             # number of trials
tri_ind = np.arange(0,tris)      # list from 0...tris
hold_out = int( tris / n_cvs )   # how many trials to hold out


# Initialize list to store accuracies
best_thresh = None
best_accuracy = 0

In [84]:


for thresh in thresholds:
  # Within each cross-validation fold
  for i in range(n_cvs):
        
    # trials to hold out as test set on this cv fold
    tst_ind = tri_ind[ i*hold_out : (i+1)*hold_out ]
    
    # index into the training data on this cv fold
    trn_ind = np.setdiff1d( tri_ind, tst_ind )

    # get the training data (X) and the training labels (y)
    # note that y is unbalanced unless prob is 50/50
    X = data_d[ trn_ind,: ]
    y = labs[ trn_ind ]

    # Convert labels based on current threshold
    y_binary = np.where( y >= thresh, 1, 0 )

    # Fit the model on the binary labels
    grid.fit( X, y_binary )
    
    # get the test data (X) and the test labels (y)
    X_test = data_d[tst_ind, :]
    y_test = labs[tst_ind]
    y_test_binary = np.where( y_test >= thresh, 1, 0 )

    # predict!
    acc[ i ] = grid.score( X_test,y_test_binary )

    # Evaluate accuracy
    accuracy = np.mean( acc )

    # Update optimal settings if needed
    if accuracy > best_accuracy:
      best_accuracy = accuracy
      best_thresh = thresh

  # Progress report after each threshold iteration
  #print(f'Threshold: {thresh}, Accuracy: {accuracy}')

# Print overall results
print(f"Optimal threshold: {best_thresh}, Best Accuracy: {best_accuracy}")

Optimal threshold: 0.30000000000000004, Best Accuracy: 0.7699999999999999


### Decode stim

In [89]:
# Decode trials: RNN stim presented

# averge over this time window post stimulus
# this is unit of model time-steps
t_win = [ 200,-1 ]

# number of cv folds
n_cvs = 5

# store the accuracy
acc = np.full( ( n_cvs ), np.nan )

# penalties to eval
num_cgs = 30
Cs = np.logspace( -5,1,num_cgs )

# set up the grid
param_grid = { 'C': Cs, 'kernel': ['linear'] }

# define object - use a SVC that balances class weights (because they are biased, e.g. 70/30)
# note that can also specify cv folds here, but I'm doing it by hand below in a loop
grid = GridSearchCV( SVC(class_weight = 'balanced'),param_grid,refit=True,verbose=0 )

# get the data from layer 1 decode stim
# this is a [trial x time step x unit] matrix
data_d = data['fr1']
labs = data['labs'].squeeze()
#labs = np.where(labs == 0, 0, 1)

In [90]:
# avg over time window
data_d = np.mean( data_d[ :,t_win[0]:t_win[1],: ], axis = 1 )

# get some info about structure of the data
tris = data_d.shape[0]             # number of trials
tri_ind = np.arange(0,tris)      # list from 0...tris
hold_out = int( tris / n_cvs )   # how many trials to hold out

# loop over cvs and do classification
for i in range(n_cvs):

    # trials to hold out as test set on this cv fold
    tst_ind = tri_ind[ i*hold_out : (i+1)*hold_out ]
    
    # index into the training data on this cv fold
    trn_ind = np.setdiff1d( tri_ind, tst_ind )

    # get the training data (X) and the training labels (y)
    X = data_d[trn_ind,:]
    y = labs[trn_ind]

    # fit the model
    grid.fit( X,y )

    # progress report
    print(f'CV: {i}, {grid.best_estimator_}')

    # get the test data (X) and the test labels (y)
    X_test = data_d[tst_ind, :]
    y_test = labs[tst_ind]

    # predict!
    acc[ i ] = grid.score( X_test,y_test )
        

print( np.mean( acc ) )

CV: 0, SVC(C=6.2101694189156165, class_weight='balanced', kernel='linear')
CV: 1, SVC(C=3.856620421163472, class_weight='balanced', kernel='linear')
CV: 2, SVC(C=1.610262027560939e-05, class_weight='balanced', kernel='linear')
CV: 3, SVC(C=4.1753189365604006e-05, class_weight='balanced', kernel='linear')
CV: 4, SVC(C=10.0, class_weight='balanced', kernel='linear')
0.7399999999999999


______________________________________________________________________