Name: Holly Kular\
Date: 03-19-2024\
Email: hkular@ucsd.edu\
decode_L1.m\
Description: Script for decoding analysis on layer 1 of probabilistic RNN

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.svm import SVC  
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import make_classification
from scipy.optimize import curve_fit
from scipy.io import loadmat

In [5]:
# MODIFY HERE
# what conditions were the RNNs trained on?
RNN_params = {}
RNN_params['prob_split'] = '70_30'
RNN_params['afc'] = 2
RNN_params['coh'] = 'hi'
RNN_params['feedback'] = False
RNN_params['thresh'] = [.3,.7]

# Decoding params
D_params = {}
D_params['time_avg'] = False
D_params['t_win'] = [200,-1]
D_params['n_cvs'] = 5
D_params['num_cgs'] = 30
D_params['label'] = 'stim' # 'stim' or 'choice'
D_params['units'] = 'all' # 'all' or 'exc' or 'inh'
D_params['pred'] = 'expected' # 'expected' or 'unexpected', trials had stim 0 or 1 dominant

# Timing of task
task_info = {}
task_info['trials'] = 1000
task_info['trial_dur'] = 250  # trial duration (timesteps)
task_info['stim_on'] = 80
task_info['stim_dur'] = 50
# decode opts
time_avg = False # do we want to look at average over time window?
if time_avg:
    t_win = [ 200,-1 ]
n_cvs = 5
window = 25
# store the accuracy
acc = np.full( ( n_cvs ), np.nan )

# penalties to eval
num_cgs = 30
Cs = np.logspace( -5,1,num_cgs )

# set up the grid
param_grid = { 'C': Cs, 'kernel': ['linear'] }

# define object - use a SVC that balances class weights (because they are biased, e.g. 70/30)
# note that can also specify cv folds here, but I'm doing it by hand below in a loop
grid = GridSearchCV( SVC(class_weight = 'balanced'),param_grid,refit=True,verbose=0 )

# Data Directory
# Load data
if sys.platform.startswith('linux'):
    data_dir = f"/mnt/neurocube/local/serenceslab/holly/RNN_Geo/data/rdk_{RNN_params['prob_split']}_{RNN_params['afc']}afc/feedforward_only/{RNN_params['coh']}_coh"
else:
    data_dir = f"/Volumes/serenceslab/holly/RNN_Geo/data/rdk_{RNN_params['prob_split']}_{RNN_params['afc']}afc/feedforward_only/{RNN_params['coh']}_coh"

# Chose the model
mat_files = [f for f in os.listdir(data_dir) if f.endswith('.mat')]# Get all the trained models (should be 40 .mat files)
model_path = os.path.join(data_dir, mat_files[0]) 
model = loadmat(model_path) # model.keys()

# Change this if we want a different number of trials and different default stim (0 is the one RNN was trained on)
if D_params['pred'] == 'expected':
    data_file = f"{data_dir}/Trials{task_info['trials']}_model{model_path[-7:-4]}_0expected.npz"
elif D_params['pred'] == 'unexpected':
    data_file = f"{data_dir}/Trials{task_info['trials']}_model{model_path[-7:-4]}_1unexpected.npz"
data = np.load(data_file)


In [6]:
# Decode trials: RNN stim presented

# get the data from layer 1 decode stim
# this is a [trial x time step x unit] matrix
data_d = data['fr1']
labs = data['labs'].squeeze()

# get some info about structure of the data
tris = data_d.shape[0]             # number of trials
tri_ind = np.arange(0,tris)      # list from 0...tris
hold_out = int( tris / n_cvs )   # how many trials to hold out

# filter by trial type
# TODO make sure expected only has expected and unexpected only has unexpected

In [7]:
def sliding_window(elements, window_size):
  if len(elements) <= window_size:
    return elements

  windows = []
  for i in range(len(elements) - window_size + 1):
    windows.append(elements[i:i + window_size])

  return windows

In [None]:
# Do decoding 
decoding_acc = np.zeros((len(range(task_info['stim_dur']+task_info['stim_on'],task_info['trial_dur'])),))

times = sliding_window(range(task_info['stim_dur']+task_info['stim_on'],task_info['trial_dur']), window)
counter = 0
for t in times:

  # Get data slice for the current window avg
    data_slice = np.mean( data_d[:,t, :], axis = 1 )

    # loop over cvs and do classification
    for i in range(n_cvs):

        # trials to hold out as test set on this cv fold
        tst_ind = tri_ind[ i*hold_out : (i+1)*hold_out ]

        # index into the training data on this cv fold
        trn_ind = np.setdiff1d( tri_ind, tst_ind )

        # get the training data (X) and the training labels (y)
        X = data_slice[trn_ind,:]
        y = labs[trn_ind]

        # fit the model
        grid.fit( X,y )

        # progress report
        #print(f'CV: {i}, {grid.best_estimator_}')

        # get the test data (X) and the test labels (y)
        X_test = data_slice[tst_ind, :]
        y_test = labs[tst_ind]

        # predict!
        acc[ i ] = grid.score( X_test,y_test )
    counter +=1
    decoding_acc[counter] = np.mean(acc)        
        
print(f'done decoding')

In [None]:
# Plot decoding accuracy over time
    plt.figure()
    plt.plot(range(task_info['stim_dur']+task_info['stim_on'],task_info['trial_dur']), decoding_acc)
    plt.xlabel('Time Step')
    plt.ylabel('Decoding Accuracy')
    plt.title('Decoding Accuracy Over Time')
    plt.axvspan(task_info['stim_on'], task_info['stim_on']+task_info['stim_dur'], color = 'gray', alpha = 0.3)
    #plt.savefig(f"{data_dir}/decode_stim_exp.png")
    plt.show()  