In [176]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sklearn
import brainiak
import nilearn as nl
from nilearn import image, plotting, input_data

from scipy.spatial import distance

from utils import *

In [177]:
from sklearn.linear_model import LogisticRegression

classifier evidence over time for each subject / wedding
- train
- eval

In [178]:
def read_logfile(sub_num):
  """ 
  read psychopy logifle into dataframe
  """
  # initialize dataframe
  df = pd.DataFrame(columns=['tstamp','logtype','logdata'])
  # open logfile
  fpath = 'behav/silvy_buckets/sub1%iday2/%i_viewing.log'%(sub_num,sub_num)
  f = open(fpath, "r")
  # loop over logfile rows
  first_TR_bool = True
  for x in f:
    tstamp,B,C = x.split('\t')
    tstamp = float(tstamp)
    # correct tstamp relative to first TR
    if first_TR_bool and C == 'Keypress: equal\n':
      first_TR_bool = False
      first_TR_tstamp = tstamp
    df.loc[tstamp,'logtype'] = B[:-1]
    df.loc[tstamp,'logdata'] = C[:-1]
    df.loc[tstamp,'tstamp'] = tstamp
  return df,first_TR_tstamp

def reformat_index(df,first_TR_tstamp,TR_rate=1.5):
  """
  include TR column
  reset timestamps relative to first TR
  reindex by row number
  """
  # add TR column
  df['TR'] = int(0)
  # reindex relative to first TR
  df.index = df.index - first_TR_tstamp 
  # include TR column
  df.loc[:,'TR'] = np.floor(df.index /TR_rate).astype(int)
  # remove negative TRs
  df = df[df.TR >= 0 ]
  # reindex by row number 
  df.index = np.arange(len(df))
  return df
  
def include_state_column(df):
  """ 
  include column "state" which indicates which stimulus is being shown
  ** NB first and last wedding not included (10/12 weddings labeled)
  """
  df['state'] = np.nan
  vid_strL = ['vid1a','vid1b','vid2','vid3','vid4','vid5']
  for vid_str0,vid_str1 in zip(vid_strL[:-1],vid_strL[1:]):
    vid_start_idx = df[df.logdata == '%s: autoDraw = True'%vid_str0].index
    vid_end_idx = df[df.logdata == '%s: autoDraw = True'%vid_str1].index
    for trial_num in range(10):
      df.loc[vid_start_idx[trial_num]:vid_end_idx[trial_num],'state']=vid_str0
  return df

def include_wed_and_nors_columns(df):
  """ 
  at the beginning of each trial, psychopy preloads the videos that will be shown in that trial
  from the video .mp3 file names, I extract the wedding number and path
  from the path I infer north-or-south
  I then populate every row within that trial with these info
  ** NB last wedding not included (11/12 weddings labeled)
  """
  # North paths
  path1 = ['1a','1b','2a','3a','4a','5']
  path2 = ['1a','1b','2b','3b','4b','5']
  # South paths
  path3 = ['1a','1b','2a','3b','4a','5']
  path4 = ['1a','1b','2b','3a','4b','5']
  ## initialize columns
  df['wed'] = 99
  df['NorS'] = ''
  ## find rows corresponding to trial start (preloading videos)
  begin_trial_idxL = df[df.logdata.str[:len("Created vid1a = ")]=="Created vid1a = "].index
  ## loop over trials
  for begin_trial_idx,end_trial_idx in zip(begin_trial_idxL[:-1],begin_trial_idxL[1:]):
    ## extract rows containing preloading, and use .str operations to clean
    logdata_trial_info = df.iloc[begin_trial_idx:begin_trial_idx+6]
    logdata_trial_info = logdata_trial_info.logdata.str.split(',').str[5].str.split('/').str[1]
    ## further clean
    wed_int = int(logdata_trial_info.str.split('.').str[0].values[0].split('-')[1])
    path = logdata_trial_info.str.split('.').str[1].values
    ## decide NorS label based on path
    if np.array_equal(path,path1) or np.array_equal(path,path2):
      NorS_str = 'N'
    elif np.array_equal(path,path3) or np.array_equal(path,path4):
      NorS_str = 'S'
    else:
      assert False
    ## insert into dataframe
    df.loc[begin_trial_idx:end_trial_idx,'wed'] = wed_int
    df.loc[begin_trial_idx:end_trial_idx,'NorS'] = NorS_str
  return df

In [180]:
def load_logdf(sub_num):
  """ wrapper for loading subject experiment timing info
  """ 
  ## read psychopy log file into dataframe
  df,first_TR_tstamp = read_logfile(sub_num)
  ##
  df = reformat_index(df,first_TR_tstamp)
  ##
  df = include_state_column(df)
  ##
  df = include_wed_and_nors_columns(df)
  return df

In [181]:
df = load_logdf(33)
df

Unnamed: 0,tstamp,logtype,logdata,TR,state,wed,NorS
0,26.7977,DATA,Keypress: equal,0,,99,
1,26.8295,EXP,Imported subj33_day2_viewing.csv as conditions...,0,,99,
2,26.8299,EXP,"Created sequence: sequential, trialTypes=12, n...",0,,99,
3,26.8306,EXP,"New trial (rep=0, index=0): OrderedDict([('nor...",0,,99,
4,26.8315,EXP,"Created sequence: random, trialTypes=1, nReps=...",0,,99,
...,...,...,...,...,...,...,...
1814,1771.23,DATA,Keypress: equal,1162,,99,
1815,1771.48,DATA,Keypress: 2,1163,,99,
1816,1772.73,DATA,Keypress: equal,1163,,99,
1817,1773.41,EXP,northorsouth2: autoDraw = False,1164,,99,


# load roi

In [191]:
def load_sub_roi(sub_num,task,roi_name):
  fpath = "sub-%i_task-%s_roi-%s.npy" %(sub_num,task,roi_name)
  return np.load('fmri_data/masked/'+fpath)

In [196]:
sub_num = 33
task = 'videos'
roi_name = 'rsherlockAvg_fc_thr5_pmc'
sub_roi_act = load_sub_roi(sub_num,task,roi_name)
sub_logdf = load_logdf(sub_num)
sub_roi_act.shape,sub_logdf.shape

((1167, 6170), (1819, 7))

In [7]:


def load_group_roi(roi_name,max_len=1160):
  L = []
  for sub_n in SUB_NS:
    sub_roi = load_sub_roi(sub_n,roi_name)
    print(sub_roi.shape)
    L.append(sub_roi[:max_len,:])
  return np.array(L)

In [12]:
group_roi = load_group_roi(ROI_NAME_L[0])

(1168, 868)
(1166, 868)
(1168, 868)
(1167, 868)
(1166, 868)
(1168, 868)
(1166, 868)
(1167, 868)
(1166, 868)
