## deprecated 
- see `generate_view_df`

* creates `timing_df_viewing.csv`
- make and save a dataframe containing the onset TR for each event in the experiment
- average across subjects to get a single common TR that will be used to index functional data

In [3]:
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt

import sklearn
import brainiak
import nilearn as nl
from nilearn import image, plotting, input_data

from scipy.spatial import distance

pd.options.display.max_rows = 200

In [10]:
"""
load log dataframe
"""

def read_logfile(sub_num,task='viewing'):
  """ 
  read psychopy logifle into dataframe
  """
  # initialize dataframe
  df = pd.DataFrame(columns=['tstamp','logdata'])
  df = df.astype({'tstamp':float})
  # open logfile
  fpath = 'data/behav/silvy_buckets/sub%iday2/%i_%s.log'%(100+sub_num,sub_num,task)
  f = open(fpath, "r")
  # loop over logfile rows
  for x in f:
    tstamp,B,C = x.split('\t')
    tstamp = float(tstamp)
#     df.loc[tstamp,'logtype'] = B[:-1]
    df.loc[tstamp,'logdata'] = C[:-1]
    df.loc[tstamp,'tstamp'] = tstamp
  return df

def reindex_and_TR_column(df):
  TR_rate = 1.5
  # first TR
  first_TR_tstamp = df[df.logdata=='Keypress: equal'].iloc[0].tstamp
  # center tstamps on first TR
  df.tstamp = df.tstamp-first_TR_tstamp
  # include TR column
  df['TR'] = np.ceil(df.tstamp/TR_rate)
  df = df.astype({'TR':int})
  # remove negative TRs
  df = df[df.TR>-1]
  # reindex by row number
  df.index = np.arange(len(df))
  # hemodynamic lag
  df.TR = df.TR+3
  return df

In [11]:
logdf = read_logfile(sub_num)
logdf = reindex_and_TR_column(logdf)
logdf

Unnamed: 0,tstamp,logdata,TR
0,0.0000,Keypress: equal,3
1,0.0318,Imported subj33_day2_viewing.csv as conditions...,4
2,0.0322,"Created sequence: sequential, trialTypes=12, n...",4
3,0.0329,"New trial (rep=0, index=0): OrderedDict([('nor...",4
4,0.0338,"Created sequence: random, trialTypes=1, nReps=...",4
...,...,...,...
1814,1744.4280,Keypress: equal,1166
1815,1744.6777,Keypress: 2,1167
1816,1745.9283,Keypress: equal,1167
1817,1746.6092,northorsouth2: autoDraw = False,1168


In [5]:
ALL_SUB_NS = np.arange(45)

In [None]:
""" initialize viewing_df"""

In [6]:
""" 
COLLECT THE ONSET TR OF EVERY VIDEO ACROSS ALL SUBJECTS IN A LONG DATAFRAME
"""
vid_strL = ['vid1a','vid1b','vid2','vid3','vid4','vid5',
           'vid1a_q','vid1b_q','vid2_q','vid3_q','vid4_q','vid5_q']

timing_df_row_L = []
for sub_num in [33]:
  ## read dataframe
  logdf = read_logfile(sub_num)
  df = reindex_and_TR_column(logdf)
  
  ## loop over video strings, extract onset TRs
  for vid_str in vid_strL:
    print('sub',sub_num,vid_str)
    ## extract onset TR for wedding string
    TR_vals = df[df.logdata == '%s: autoDraw = True'%vid_str].TR.values
    if vid_str[-2:]=='_q':
      timing_df_row = pd.DataFrame.from_dict({
        'sub_num':np.repeat(sub_num,2),
        'vid_str':np.repeat(vid_str,2),
        'wed_num':[0,11],
        'onset_TR':TR_vals
      })
    else:
      timing_df_row = pd.DataFrame.from_dict({
        'sub_num':np.repeat(sub_num,10),
        'vid_str':np.repeat(vid_str,10),
        'wed_num':np.arange(1,11),
        'onset_TR':TR_vals
      })
    timing_df_row_L.append(timing_df_row)
    
timing_df_cat = pd.concat(timing_df_row_L)
timing_df_cat.iloc[:200]

sub 33 vid1a
sub 33 vid1b
sub 33 vid2
sub 33 vid3
sub 33 vid4
sub 33 vid5
sub 33 vid1a_q
sub 33 vid1b_q
sub 33 vid2_q
sub 33 vid3_q
sub 33 vid4_q
sub 33 vid5_q


Unnamed: 0,sub_num,vid_str,wed_num,onset_TR
0,33,vid1a,1,109
1,33,vid1a,2,204
2,33,vid1a,3,301
3,33,vid1a,4,397
4,33,vid1a,5,493
5,33,vid1a,6,588
6,33,vid1a,7,684
7,33,vid1a,8,781
8,33,vid1a,9,877
9,33,vid1a,10,973


In [6]:
""" 
TAKE THE MEAN ACROSS SUBJECTS TO GET A DATAFRAME WITH A TR VALUE FOR EACH VIDEO OF EACH WEDDING
"""
timing_df = timing_df_cat.groupby(['wed_num','vid_str']).mean()
timing_df = timing_df.apply(np.floor).astype(int)
timing_df = timing_df.reset_index()
timing_df = timing_df.drop('sub_num',axis=1)

In [8]:
timing_df.iloc[:200]

Unnamed: 0,wed_num,vid_str,onset_TR
0,0,vid1a_q,12
1,0,vid1b_q,29
2,0,vid2_q,35
3,0,vid3_q,49
4,0,vid4_q,67
5,0,vid5_q,83
6,1,vid1a,111
7,1,vid1b,128
8,1,vid2,134
9,1,vid3,145


# include offset

In [9]:
"""
the first 26 seconds are intro, followed by 9 seconds start-event, 
17 seconds campfire or flower (depending on label in pkl), 
23 seconds coin or torch, 24 seconds egg or painting, and remainder gifts. 
"""

## NB CURRENTLY USING 10S AS PLACEHOLDER FOR FINAL EVENT 
TR_rate = 1.5
vid_len_D = {'vid1a':26/TR_rate,'vid1b':9/TR_rate,'vid2':17/TR_rate,
             'vid3':23/TR_rate,'vid4':24/TR_rate,'vid5':10/TR_rate,
             'vid1a_q':26/TR_rate,'vid1b_q':9/TR_rate,'vid2_q':17/TR_rate,
             'vid3_q':23/TR_rate,'vid4_q':24/TR_rate,'vid5_q':10/TR_rate
            }
vid_len_D = {k:np.round(v).astype(int) for k,v in vid_len_D.items()}
vid_len_D

{'vid1a': 17,
 'vid1b': 6,
 'vid2': 11,
 'vid3': 15,
 'vid4': 16,
 'vid5': 7,
 'vid1a_q': 17,
 'vid1b_q': 6,
 'vid2_q': 11,
 'vid3_q': 15,
 'vid4_q': 16,
 'vid5_q': 7}

In [10]:
for vid_str,num_TRs in vid_len_D.items():
  timing_df.loc[timing_df['vid_str']==vid_str,'len_TRs'] = num_TRs
timing_df['offset_TR'] = timing_df['onset_TR'] + timing_df['len_TRs']

In [11]:
timing_df = timing_df.astype({'onset_TR':int,'len_TRs':int,'offset_TR':int})

In [12]:
timing_df

Unnamed: 0,wed_num,vid_str,onset_TR,len_TRs,offset_TR
0,0,vid1a_q,12,17,29
1,0,vid1b_q,29,6,35
2,0,vid2_q,35,11,46
3,0,vid3_q,49,15,64
4,0,vid4_q,67,16,83
5,0,vid5_q,83,7,90
6,1,vid1a,111,17,128
7,1,vid1b,128,6,134
8,1,vid2,134,11,145
9,1,vid3,145,15,160


In [13]:
timing_df.to_csv('deriv/timing_df_viewing.csv')