# Decoding state information

- extract information from files in `recall_transcriptions` folder.
    - see `behav` notebook

In [1]:
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt

import sklearn
import brainiak
import nilearn as nl
from nilearn import image, plotting, input_data

from scipy.spatial import distance
from sklearn.preprocessing import StandardScaler

In [2]:
from sklearn.linear_model import LogisticRegression
pd.options.display.max_rows = 200

# load dataframes with timing and order information

In [3]:
timing_df = pd.read_csv('deriv/state_timing_df_viewing.csv',index_col=0)
timing_df.iloc[:200]

Unnamed: 0,sub_num,vid_str,wed_num,onset_TR,state,len_TRs,offset_TR
0,3,vid1a,1,111,1b,17,128
1,3,vid1a,2,207,1b,17,224
2,3,vid1a,3,303,1b,17,320
3,3,vid1a,4,399,1b,17,416
4,3,vid1a,5,495,1b,17,512
5,3,vid1a,6,591,1b,17,608
6,3,vid1a,7,687,1b,17,704
7,3,vid1a,8,784,1b,17,801
8,3,vid1a,9,880,1b,17,897
9,3,vid1a,10,976,1b,17,993


# get state classifiers

In [4]:
def get_training_info(sub_num,depth):
  """ 
  information required for training state classifier on given layer
  NB assumes timing_df is loaded as global variable 
  """
  # relevant subset of dataframe 
  sub_depth_timing_df = timing_df[(
    timing_df.loc[:,'sub_num']==sub_num) & (
    timing_df.loc[:,'state'].str[0] == str(depth)
  )]
  # collect TRs for X_train and state_value for Y_train
  TR_L = []
  ytarget_L = []
  # iter over rows (wedding)
  for idx,wed_row in sub_depth_timing_df.iterrows():
    # TRs for given wedding
    wed_TRs = np.arange(wed_row.onset_TR,wed_row.offset_TR)
    TR_L.extend(wed_TRs)
    # yhat for each TR
    state_bool = wed_row.loc['state'][-1] == 'a'
    wed_yhat = np.repeat(int(state_bool),len(wed_TRs))
    ytarget_L.extend(wed_yhat)
  # ensure same number of samples for both conditions
  assert sum(ytarget_L) == len(ytarget_L)/2
  return TR_L,ytarget_L

In [5]:
def load_sub_roi(sub_num,roi_name,task):
  ''' task = [videos,recall2]'''
  fpath = "sub-%i_task-%s_roi-%s.npy" %(sub_num,task,roi_name)
  return np.load('data/fmri/masked/'+fpath)

In [15]:
sub_roi_recall = load_sub_roi(sub_num,roi_name,'recall2')
sub_roi_recall.shape

(524, 5037)

In [11]:
roi_name = 'rglasser_PM_net'
sub_num = 33
depth = 2 # [2,3,4]

TR_L,ytarget_train = get_training_info(sub_num,depth)
sub_roi_view = load_sub_roi(sub_num,roi_name,'videos')
xact_train = sub_roi_view[TR_L,:]

scaler = StandardScaler()
xact_train = scaler.fit_transform(xact_train)

clf = sklearn.linear_model.LogisticRegression(solver='liblinear',C=1.00)
clf.fit(xact_train,ytarget_train)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
assert False

# behavioral recall data dataframe

# PARENT NB

In [None]:
def get_data(sub_num,roi_name,task):
  """
  task: [videos,recall2]
  returns the func data for given sub/roi/task for all 12 weddings
  """
  try:
    sub_roi_act = load_sub_roi(sub_num,roi_name,task)
    sub_wed_df = wed_df[wed_df.sub_num==sub_num]
  except:
    print('err loading sub',sub_num)
    return None
  Xact_L,ytarget_L = [],[]
  stimstr_L = []
  for idx,df_row in sub_wed_df.iterrows():
    Xact_wed = sub_roi_act[df_row.loc['onset_%s'%task]:df_row.loc['offset_%s'%task]]
    ytarget_wed = np.repeat(int(df_row.wed_class == 'N'),len(Xact_wed))
    Xact_L.append(Xact_wed)
    ytarget_L.append(ytarget_wed)
    # string identifying test sequences
    stimstr = "wed_%i-class_%s"%(df_row.wed_id,df_row.wed_class)
    stimstr_L.append(stimstr)
  return Xact_L,ytarget_L,stimstr_L

In [None]:
roi_name= 'rglasser_PM_net'
clf_c = 1.00

for sub_num in range(30,39):
  print('sub',sub_num)
  ## train data
  try:
    Xact_train_L,ytarget_train_L,stimstr_L = get_data(sub_num,roi_name,'videos')
  except:
    continue
  ytarget_train = np.concatenate(ytarget_train_L)
  Xact_train = np.concatenate(Xact_train_L)
  ## test data
  Xact_test_L,ytarget_test_L,stimstr_L = get_data(sub_num,roi_name,'recall2')
  ## normalize
  scaler = StandardScaler()
  Xact_train = scaler.fit_transform(Xact_train)
  Xact_test_L = [scaler.transform(Xact) for Xact in Xact_test_L]
  ## fit classifier
  clf = sklearn.linear_model.LogisticRegression(solver='liblinear',C=clf_c)
  clf.fit(Xact_train,ytarget_train)
  ## EVAL LOOP: loop over 12 weddings for eval
  for idx_test in range(12):
    stimstr = stimstr_L[idx_test]
    # eval data for given wedding
    Xact_test_wed = Xact_test_L[idx_test]
    ytarget_test_wed = np.unique(ytarget_test_L[idx_test])
    # fit classifier
    yhat_wed = clf.predict_proba(Xact_test_wed)[:,ytarget_test_wed]
    np.save('deriv/analyses/NvS_train_view_test_recall/sub_%i-%s'%(sub_num,stimstr),yhat_wed)

