In [1]:
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt

import sklearn
import brainiak
import nilearn as nl
from nilearn import image, plotting, input_data

from scipy.spatial import distance
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
pd.options.display.max_rows = 200

# load dataframes with timing and order information

In [2]:
view_df = pd.read_csv('deriv/view_df.csv',index_col=0)
view_df

Unnamed: 0,sub_num,vid_str,wed_num,onset_TR,state,len_TRs,offset_TR
60,25,vid1a_q,0,12,1b,17,29
60,29,vid1a_q,0,12,1b,17,29
60,6,vid1a_q,0,12,1b,17,29
60,10,vid1a_q,0,12,1b,17,29
60,36,vid1a_q,0,12,1b,17,29
...,...,...,...,...,...,...,...
71,42,vid5_q,11,1146,5,7,1153
71,14,vid5_q,11,1146,5,7,1153
71,12,vid5_q,11,1147,5,7,1154
71,13,vid5_q,11,1147,5,7,1154


In [3]:
wed_df = pd.read_csv('deriv/wed_df.csv',index_col=0)
wed_df.path = wed_df.path.fillna('NA')
# wed_df.iloc[:200]

In [5]:
recall_df = pd.read_csv('deriv/recall_df.csv',index_col=0)
# recall_df

# form training and testing datasets

### classifier training 

In [5]:
""" 
build training dataset
"""

pathlayer2label_D = {
  ('NA',2):'a',
  ('NA',3):'a',
  ('NA',4):'a',
  ('NB',2):'b',
  ('NB',3):'b',
  ('NB',4):'b',
  ('SA',2):'a',
  ('SA',3):'b',
  ('SA',4):'a',
  ('SB',2):'b',
  ('SB',3):'a',
  ('SB',4):'b',
}

def get_training_info(sub_num,layer_num):
  """
  for given subject/layer 
  returns info needed to train state classifier
    the TRs when viewing layer (for all 12 weddings)
    and the labels of the states for that layer
  """
  ytarget_L = []
  TR_L = []
  # wed_df contains labels for given wedding 
  # select subject specific rows of wed_df
  sub_wed_df = wed_df[wed_df.sub_num==sub_num]
  for wed_num in range(12):
    path = sub_wed_df[sub_wed_df.wed_view_num==wed_num].path.values[0]
    wed_bool = (view_df.wed_num == wed_num) 
    layer_bool = (view_df.vid_str.str[:len('vid1')] == 'vid%i'%layer_num)
    # TRs for given state (for given sub/layer)
    onsetTR,offsetTR = view_df[wed_bool&layer_bool].loc[:,('onset_TR','offset_TR')].values[0]
    state_TRs = np.arange(onsetTR,offsetTR)
    # state label
    state_label = pathlayer2label_D[(path,layer_num)]
    # 
    TR_L.extend(state_TRs)
    ytarget_L.extend(np.repeat(state_label=='a',len(state_TRs)))
  return np.array(TR_L),np.array(ytarget_L)

In [6]:
""" 
build testing dataset
""" 

## dict mapping (layer,state):recall_transcript_code
recall_label_D = {
  (2,'a'):3,
  (2,'b'):4,
  (3,'a'):5,
  (3,'b'):6,
  (4,'a'):7,
  (4,'b'):8
}

def get_test_info(sub_num,layer_num):
  """ 
  build testing dataset
  find TRs during recall when sub is recalling given state+layer
  along with labels for these recall TRs when recalling layer
  """
  ytarget = []
  XTRs = []
  sub_recall_df = recall_df[recall_df.sub_num==sub_num]
  for state_id in ['a','b']:
    # from layer+state get transcript_code
    recall_code = recall_label_D[(layer_num,state_id)]
    # find TRs where sub talks about layer+state
    TRs_state = sub_recall_df[sub_recall_df.recall==recall_code].index.values
    XTRs.extend(TRs_state)
    ytarget.extend(np.repeat(state_id,len(TRs_state)))
  return XTRs,np.array(ytarget)=='a'

# train-test loop

In [7]:
def load_sub_roi(sub_num,roi_name,task):
  fpath = "sub-%i_task-%s_roi-%s.npy" %(sub_num,task,roi_name)
  return np.load('data/fmri/masked/'+fpath)

In [11]:
""" 
train and test classifier
"""

roi_name= 'rglasser_PM_net'
clf_c = 1.00

L = []
for sub_num,layer_num in itertools.product(np.arange(45),range(2,5)):
  print('s',sub_num,'l',layer_num)
  # load fmri data; 
  try: # check that fmri files exist
    sub_roi_view = load_sub_roi(sub_num,roi_name,'videos')
    sub_roi_recall = load_sub_roi(sub_num,roi_name,'recall2')
    assert len(sub_roi_view)
    assert len(sub_roi_recall)
  except:
    print('err loading sub',sub_num)
    continue
  print('X')
  ## build train/test datasets
  # train
  train_TRs,Ytrain = get_training_info(sub_num,layer_num)
  Xtrain = sub_roi_view[train_TRs,:]
  # test  
  test_TRs,Ytest = get_test_info(sub_num,layer_num)
  Xtest = sub_roi_recall[test_TRs,:]
  # check if recall data exists
  if not len(Xtest): 
    print('no recall data. sub',sub_num,'layer',layer_num)
    continue
  print('XX')
  ## normalize
  scaler = StandardScaler()
  Xtrain = scaler.fit_transform(Xtrain)
  Xtest = scaler.transform(Xtest)
  ## fit classifier
  clf = sklearn.linear_model.LogisticRegression(solver='liblinear',C=clf_c)
  clf.fit(Xtrain,Ytrain)
  # eval classifier
  yhat = clf.predict_proba(Xtrain)
  score = clf.score(Xtest,Ytest)
  # record
  D = {}
  D['num_test_samples']=len(Ytest)
  D['sub_num']=sub_num
  D['layer']=layer_num
  D['score']=score
  L.append(D)

## 
results = pd.DataFrame(L)

s 0 l 2
err loading sub 0
s 0 l 3
err loading sub 0
s 0 l 4
err loading sub 0
s 1 l 2
err loading sub 1
s 1 l 3
err loading sub 1
s 1 l 4
err loading sub 1
s 2 l 2
err loading sub 2
s 2 l 3
err loading sub 2
s 2 l 4
err loading sub 2
s 3 l 2
err loading sub 3
s 3 l 3
err loading sub 3
s 3 l 4
err loading sub 3
s 4 l 2
err loading sub 4
s 4 l 3
err loading sub 4
s 4 l 4
err loading sub 4
s 5 l 2
err loading sub 5
s 5 l 3
err loading sub 5
s 5 l 4
err loading sub 5
s 6 l 2
err loading sub 6
s 6 l 3
err loading sub 6
s 6 l 4
err loading sub 6
s 7 l 2
err loading sub 7
s 7 l 3
err loading sub 7
s 7 l 4
err loading sub 7
s 8 l 2
err loading sub 8
s 8 l 3
err loading sub 8
s 8 l 4
err loading sub 8
s 9 l 2
err loading sub 9
s 9 l 3
err loading sub 9
s 9 l 4
err loading sub 9
s 10 l 2
err loading sub 10
s 10 l 3
err loading sub 10
s 10 l 4
err loading sub 10
s 11 l 2
err loading sub 11
s 11 l 3
err loading sub 11
s 11 l 4
err loading sub 11
s 12 l 2
err loading sub 12
s 12 l 3
err loading sub



s 32 l 2
X
XX
s 32 l 3
X
XX
s 32 l 4
X
XX
s 33 l 2
X
XX
s 33 l 3
X
XX
s 33 l 4
X
XX
s 34 l 2
X
XX
s 34 l 3
X
XX
s 34 l 4
X
XX
s 35 l 2
X
XX
s 35 l 3
X
no recall data. sub 35 layer 3
s 35 l 4
X
XX




s 36 l 2
X
XX
s 36 l 3
X
XX
s 36 l 4
X
XX
s 37 l 2
err loading sub 37
s 37 l 3
err loading sub 37
s 37 l 4
err loading sub 37
s 38 l 2
X
XX
s 38 l 3
X
no recall data. sub 38 layer 3
s 38 l 4
X
XX




s 39 l 2
err loading sub 39
s 39 l 3
err loading sub 39
s 39 l 4
err loading sub 39
s 40 l 2
err loading sub 40
s 40 l 3
err loading sub 40
s 40 l 4
err loading sub 40
s 41 l 2
err loading sub 41
s 41 l 3
err loading sub 41
s 41 l 4
err loading sub 41
s 42 l 2
err loading sub 42
s 42 l 3
err loading sub 42
s 42 l 4
err loading sub 42
s 43 l 2
err loading sub 43
s 43 l 3
err loading sub 43
s 43 l 4
err loading sub 43
s 44 l 2
err loading sub 44
s 44 l 3
err loading sub 44
s 44 l 4
err loading sub 44


In [12]:
Nsubs = len(results.sub_num.unique())
results.to_csv('data/analyses/decodeState_trainView_testRecall-N%i.csv'%Nsubs)
