In [1]:
from pathlib import *
import numpy as np
import pandas as pd

In [2]:
def get_subids(behavepath):
    subfiles="*study"
    sublist = []
    for filepathobj in behavepath.glob(subfiles):
        fname=filepathobj.name
        sub = fname[:3]
        sublist.append(sub)
    sublist.sort()
    return sublist

def set_behavior_path(sub, behavestring, extra):
    behaveobj=[behavestring+sub+extra]
    behavefilepath=Path(behaveobj[0])
    behavefilepath.exists()
    return behavefilepath

def read_study_file(filepath):
    """read in studyarray, turn into DataFrame and delete extra columns"""
    colnames=['obj1','obj2','obj3','cuecond','loc1','x1','y1','loc2','x2','y2','loc3','x3','y3',
     'dom_loc_rt','dom_loc_resp','cond','block','obj_type', 'dom_obj_id', 'dom_loc_actual', 'dom_choice_rt']
    studyarray=pd.read_table(filepath,header=None,names=colnames)
    tmpmask=~studyarray.columns.str.contains('tmp')
    studyarray=studyarray[studyarray.columns[tmpmask]]
    trialnum = np.arange(1,len(studyarray)+1)
    studyarray['studytrial'] = trialnum
    return studyarray

def read_test_file(filepath):
    """read in testarray, turn into DataFrame and delete extra columns"""
    colnames=['obj1','obj2','obj3','cuecond','loc1','x1','y1','loc2','x2','y2','loc3','x3','y3',
     'dom_loc_rt','dom_loc_resp','cond','block','obj_type', 'dom_obj_id', 'dom_loc_actual', 'dom_choice_rt',
             'tmp', 'tmp', 'test_obj_id', 'test_loc_id', 'tmp', 'tmp', 'tmp', 'tmp', 'test_resp', 'test_obj_rt',
             'test_resp_loc_id', 'test_loc_rt', 'conf', 'tmp']
    testarray=pd.read_table(filepath,header=None,names=colnames)
    tmpmask=~testarray.columns.str.contains('tmp')
    testarray=testarray[testarray.columns[tmpmask]]
    trialnum = np.arange(1,len(testarray)+1)
    testarray['testtrial'] = trialnum
    testarray['recog_accuracy'] = testarray['test_resp'] == 1
    testarray['recog_loc_accuracy'] = (testarray['test_resp_loc_id'] == testarray['loc1']) | (testarray['test_resp_loc_id'] == testarray['loc2']) | (testarray['test_resp_loc_id'] == testarray['loc3'])
    testarray['manip_accuracy'] = testarray['dom_loc_resp'] == testarray['dom_loc_actual']
    return testarray

# merge study & test
def merge_study_test(studyarray, testarray):
    # merge study & test
    testarray['tmp_obj1'] = testarray['obj1'] + (1000*testarray['obj_type'])
    testarray = testarray.set_index('tmp_obj1', drop=True)
    studyarray['tmp_obj1'] = studyarray['obj1'] + (1000*studyarray['obj_type'])
    studyarray.sort_values('tmp_obj1')
    studyarray = studyarray.set_index('tmp_obj1', drop=True)
    behavearray = studyarray.copy()

    for ind, ldf in testarray.iterrows():
        for col in ['manip_accuracy', 'recog_accuracy', 'recog_loc_accuracy', 'testtrial', 'test_obj_id']:
            behavearray.loc[ind, col] = ldf[col]
    behavearray.reset_index(inplace=True)
    behavearray.drop(columns = 'tmp_obj1', inplace=True)
    return behavearray

def adjust_pres_coords(array,x,y,xmax,ymax):
    """adjustment for behavioral coords to match
    eye coords for presentation version of exp"""
    newarray=pd.DataFrame()
    newarray[x]=array[x]+xmax
    newarray[y]=(array[y]-ymax)*-1
    return newarray

def apply_adjust_pres_coords(behavearray, sub):
    """applies adjust_pres_coords to all
    coords in behave array"""
           
    xs = [f'x{loc}' for loc in range(1,4)]
    ys = [f'y{loc}' for loc in range(1,4)]
    newlocs = pd.DataFrame()
    for loc in zip(xs, ys):
        x = loc[0]
        y = loc[1]
        newloc=adjust_pres_coords(behavearray,x,y, xmax, ymax)
        newlocs[x] = newloc[x]
        newlocs[y] = newloc[y]

    cols=newlocs.columns.tolist()
    for loc in cols:
        behavearray[loc]=newlocs[loc]
    return behavearray

In [3]:
# define manipulated object, tested object, and other object from obj1, obj2, obj3
def define_obj_types(behavearray):
    alldomarray=pd.DataFrame()
    alltestarray=pd.DataFrame()
    allotherarray=pd.DataFrame()

    objlist = ['obj1', 'obj2', 'obj3']
    nums=['1', '2', '3']

    for obj,num in zip(objlist,nums):
        domarray = behavearray[behavearray['dom_obj_id']==behavearray[obj]]
        domarray['manip_obj'] = obj
        domarray['manip_x'] = domarray[f'x{num}']
        domarray['manip_y'] = domarray[f'y{num}']   
        domarray['manip_loc'] = domarray[f'loc{num}']
        alldomarray = pd.concat([alldomarray,domarray])

        testarray = behavearray[behavearray['test_obj_id']==behavearray[obj]]
        testarray['test_obj'] = obj
        testarray['test_x'] = testarray[f'x{num}']
        testarray['test_y'] = testarray[f'y{num}']   
        testarray['test_loc'] = testarray[f'loc{num}']

        alltestarray = pd.concat([alltestarray,testarray])

        othermask = ((behavearray['test_obj_id']!=behavearray[obj]) & (behavearray['dom_obj_id']!=behavearray[obj]) 
                     & (behavearray['testtrial'].notnull()))
        otherarray = behavearray[othermask]
        otherarray['other_obj'] = obj
        otherarray['other_x'] = otherarray[f'x{num}']
        otherarray['other_y'] = otherarray[f'y{num}']   
        otherarray['other_loc'] = otherarray[f'loc{num}']

        allotherarray = pd.concat([allotherarray,otherarray])


    catchtrials = behavearray[behavearray['testtrial'].isnull()]

    alldomarray = pd.concat([alldomarray,catchtrials])
    alltestarray = pd.concat([alltestarray,catchtrials])
    allotherarray = pd.concat([allotherarray,catchtrials])

    alldomarray.sort_index(inplace=True)
    alltestarray.sort_index(inplace=True)
    allotherarray.sort_index(inplace=True)
    return alldomarray, alltestarray, allotherarray

def edit_obj_ids(behavearray, alldomarray, alltestarray, allotherarray):
    new_objs = ['manip', 'test', 'other']
    obj_arrays = [alldomarray, alltestarray, allotherarray]

    for obj,array in zip(new_objs, obj_arrays):
        behavearray[f'{obj}_obj'] = array[f'{obj}_obj']
        behavearray[f'{obj}_x'] = array[f'{obj}_x']
        behavearray[f'{obj}_y'] = array[f'{obj}_y']
        behavearray[f'{obj}_loc'] = array[f'{obj}_loc']
        
    droplist=[]
    for type in ['obj', 'loc', 'x', 'y']:
        drop = [f'{type}{num}' for num in range(1,4)]
        droplist += drop 

    behavearray.drop(columns=droplist, inplace=True)
    
    return behavearray

In [4]:
def parse_eye_filename(pathobject):
    fname=pathobject.name
    parts=fname.split(".")[0]
    subject=parts[:3]
    block=parts[3:4]
    subdict={"subject":subject, "block":block, "fname":fname}
    return subdict

def get_eye_files(subids,eyepath):
    """ returns master dataframe including eye file name, block, phase, subid
    input list of subject strings, Path object pointing to eye files
    """
    substrings=[s+"*.asc" for s in subids]
    subinfo=[]
    for s in substrings:
        for filepathobj in eyepath.glob(s):
            subdict=parse_eye_filename(filepathobj)
            subinfo.append(subdict)

    masterdf=pd.DataFrame(subinfo).sort_values(by=["subject","block"])
#     print(masterdf.head())
    masterdf=masterdf[["subject","block","fname"]]
    masterdf = masterdf[~masterdf['fname'].str.contains('i')]
    masterdf.index=range(len(masterdf))
    return masterdf

def parse_eye_events_to_intline(line,extrainfo):
    efixspace=["","",""]
    eblinkspace=efixspace*2
    newline=line.split()
    if "EFIX" in line:
        newline.extend(efixspace)
    elif "EBLINK" in line:
        newline.extend(eblinkspace)
    newline.extend(extrainfo)
    return newline

def parse_eye_line(eye_sub, eyestring):
    """ parses each line of eye file for a given eye_phase_sub
    input one phase type list of files for a subs
    and the path to the file (in form of a string)
    outputs dataframe with all events in table
    """
    etypes=('ESACC','EFIX','EBLINK')
    study=[]
    restudy=[]
    blocks=eye_sub.block
    fnames=eye_sub.fname
    subjects=eye_sub.subject
    trialnum=0
    print(eyestring)
    studyphase = False
    for block,fname,subject in zip(blocks,fnames,subjects):
        blocktrial=0
        path_file=eyestring+fname
        p=Path(path_file)
        with p.open() as f:
            for line in f:
                if "studypre" in line:
                    trialnum += 1
                    studyphase = True
                elif "studypost" in line:
                    studyphase = False
                elif "ERROR MESSAGES LOST" in line:
                    studyphase=True
                    trialnum += 1
                    
                if "START" in line:
                        
                    startline=line.split()
                    starttime=int(startline[1])


                if any(e in line for e in etypes):
                    extrainfo=[starttime,trialnum,block,subject]
                    newline=parse_eye_events_to_intline(line,extrainfo)
                    if studyphase:
                        study.append(newline)
                    else:
                        restudy.append(newline)
    return study, restudy


def events_to_df(events):
    """ change raw events to data DataFrame
    then and change values to numeric"""

    eye_events_df=pd.DataFrame(events)
    eye_events_df=eye_events_df.apply(pd.to_numeric,errors='ignore')
    headers=["event","eye","start","end","duration",
    "xstart","ystart","xend","yend","?","?","trialstart",
    "trialnum","block","sub"]
    eye_events_df.columns=headers
    return eye_events_df

def eventsdf_cleanup(eye_events_df):
    x=pd.DataFrame()
    """adjust trial start time, remove irrelevant values in fixation rows,
    and then delete excess columns"""

    eyedf_clean=eye_events_df.copy()

    eyedf_clean['start']=eyedf_clean['start']-eyedf_clean['trialstart']
    eyedf_clean['end']=eyedf_clean['end']-eyedf_clean['trialstart']

    efix_mask = (eyedf_clean["event"]=="EFIX")
    eyedf_clean.loc[efix_mask, 'xend'] = np.nan
    for col in ['xstart', 'ystart', 'xend', 'yend']:
        x = eyedf_clean[col]
        eyedf_clean[col] = pd.to_numeric(x, errors='coerce')


    del eyedf_clean['trialstart']
    del eyedf_clean['?']
    del eyedf_clean['eye']

    return eyedf_clean

In [5]:
def eye_behave_combo(eyearray,behavearray):
    eyebehave=eyearray.copy()
    eyecols=eyebehave.columns.tolist()
    behavecols=['manip_x','manip_y','test_x','test_y','other_x','other_y','cond', 'manip_accuracy', 'studytrial', 'testtrial',
                'recog_accuracy', 'recog_loc_accuracy', 'manip_accuracy', 'cuecond']
    allcols=eyecols+behavecols
    eyebehave=eyebehave.reindex(columns=allcols)
    order_col='studytrial'
    behavearray.sort_values(by=[order_col], inplace=True)
    behavearray.set_index(order_col, drop=False, inplace=True)
    for trial in range(0,behavearray.shape[0]):
        eyetrialevents=(eyebehave['trialnum']==trial+1)
        eyetrial=eyebehave.loc[eyetrialevents]
        for col in behavecols:
            eyetrial.loc[eyetrialevents,col]=behavearray.loc[trial+1,col]

        eyebehave.loc[eyetrialevents]=eyetrial
    return eyebehave


def dist(array,x1,y1,x2,y2):
    """ distance formula for columns of coords"""
    dx=array[x1]-array[x2]
    dy=array[y1]-array[y2]
    dist=np.sqrt(dx**2+dy**2)
    return dist

def calculate_dist(eyebehave,x1,y1,name):
    """ calculate distances for start and end eye locations"""
    for x in eyebehave:
        distdict={'manip_obj':dist(eyebehave,x1,y1,'manip_x','manip_y'),
                        'test_obj':dist(eyebehave,x1,y1,'test_x','test_y'),
                        'other_obj':dist(eyebehave,x1,y1,'other_x','other_y')}

    distarray=pd.DataFrame(distdict)
    col=distarray.columns.tolist()
    distarray.columns=[c+name for c in col]
    return distarray


def loc_view(eyebehave,distarray,name):
    distarray.idxmin(axis=1)
    mindistmask=distarray.min(axis=1)<180
    distmins=distarray.loc[mindistmask]

    distminlocs=distmins.idxmin(axis=1)
    eyebehave[name]="none"
    eyebehave.loc[mindistmask,name]=distminlocs
    return eyebehave

def screenview(x,y,xmax,ymax):
    screen='screen'
    if x>xmax:
        screen='offscreen'
    if x<(0):
        screen='offscreen'
    if y>ymax:
        screen='offscreen'
    if y<(0):
        screen='offscreen'
    return screen

def assign_screenview(eyebehavedict,xname,yname,name,xmax,ymax):
    colname=name+'loc'
    for loc in eyebehavedict:
        screen=screenview(loc[xname],loc[yname],xmax,ymax)
        if loc[colname]=='none':
            loc[colname]=screen
        if name !='end':
            continue
        if loc['event']=='EFIX':
            loc[colname]=np.nan
    return eyebehavedict


def adjust_fix_before_blink(eyebehavedict):
    """replace fixations <100 ms before blinks"""
    tmp_dict=eyebehavedict.copy()
    new_previous_events=[]
    for i,ind in enumerate(tmp_dict):
        current_event = ind
        if i>0:
            if current_event['event']=='EBLINK':
                if previous_event['trialnum']==current_event['trialnum']:
                    if previous_event['event']=='EFIX' and previous_event['duration']<100:
                        previous_event['event']='blink'
            new_previous_events.append(previous_event)
        previous_event=ind
    new_previous_events.append(previous_event)
    return new_previous_events

def adjust_event_after_blink(new_previous_events):
    new_post_events=[]
    new_events=new_previous_events.copy()
    flag=False
    for current_event in new_events:
        event_type=current_event['event']
        current_trial=current_event['trialnum']
        if flag==True and previous_trial==current_trial:
            if event_type=='ESACC':
                event_type='blink'
            elif event_type=='EFIX':
                if current_event['duration']<100:
                    event_type='blink'
        new_post_events.append(current_event)
        flag=(event_type=='EBLINK')
        previous_trial=current_trial
    return new_post_events

def eyedict_backto_df(new_post_events):
    corrected_eyedf=pd.DataFrame(new_post_events)
    old_blink_mask=corrected_eyedf['event']!='EBLINK'
    corrected_eyedf=corrected_eyedf[old_blink_mask]
    corrected_eyedf.sort_values(['block','trialnum','start'])
    corrected_eyedf=corrected_eyedf.reset_index(drop=True)
    corrected_eyedf['cuecond']=corrected_eyedf['cuecond'].map({1:1, 2:0})
    corrected_eyedf['all_accuracy'] = ((corrected_eyedf['manip_accuracy']) &
                              (corrected_eyedf['recog_accuracy']))
    corrected_eyedf['dom_accuracy'] = ((corrected_eyedf['all_accuracy']) & (corrected_eyedf['cuecond']))

    return corrected_eyedf

In [6]:
behavestring = '/Volumes/Voss_Lab/MRI/domchoicefmri_djb/behave.data/'
behavepath=Path(behavestring)
eyestring = '/Volumes/Voss_Lab/MRI/domchoicefmri_djb/eye.data/'
eyepath=Path(eyestring)

# run functions
subids = get_subids(behavepath)

for sub in ['804']:
    if sub == '918':
        continue
    if int(sub) < 900:
        xmax = 1920/2
        ymax = 1080/2
    else:
        xmax = 1280/2
        ymax = 1024/2
        
    # process behavior
    studyfilepath = set_behavior_path(sub, behavestring, 'study')
    studyarray = read_study_file(studyfilepath)

    testfilepath = set_behavior_path(sub, behavestring, 'test')
    testarray = read_test_file(testfilepath)
    # merge study & test
    behavearray = merge_study_test(studyarray, testarray)
 # adjust coords
    behavearray = apply_adjust_pres_coords(behavearray, sub)
# define manipulated object, tested object, and other object from obj1, obj2, obj3
    alldomarray, alltestarray, allotherarray = define_obj_types(behavearray)
# add in new object identities and remove old ones    
    print(sub)
    print(behavearray.shape)
    print(alldomarray.shape)
    print(alltestarray.shape)
    print(allotherarray.shape)
    behavearray = edit_obj_ids(behavearray, alldomarray, alltestarray, allotherarray)
    print(behavearray)
    # get eye files and process
    masterdf = get_eye_files(subids,eyepath)
    eye_sub = masterdf[masterdf['subject']==sub]
    study, restudy = parse_eye_line(eye_sub, eyestring)

    studydf = events_to_df(study)
    studyeyearray = eventsdf_cleanup(studydf)
    restudydf = events_to_df(restudy)
    restudeyearray = eventsdf_cleanup(restudydf)
    # concatenate study and restudy eye arrays
    studyeyearray['phase']='study'
    restudeyearray['phase']='restudy'
    eyearray = pd.concat([studyeyearray, restudeyearray], ignore_index=True)
    print(eyearray)
    # combine behavior & eye data
    eyebehave = eye_behave_combo(eyearray, behavearray)

    # calculate distances for start and end eye locations
    startdistarray=calculate_dist(eyebehave,x1='xstart',y1='ystart',name='start')
    enddistarray=calculate_dist(eyebehave,'xend','yend','end')

    # start & end locations
    eyebehave=loc_view(eyebehave,startdistarray,'startloc')
    eyebehave=loc_view(eyebehave,enddistarray,'endloc')

    #append start & end distances to eyebehave array
    eyebehave=pd.concat([eyebehave, startdistarray, enddistarray], axis=1)

    ''' change df to dict'''
    eyebehavedict=eyebehave.to_dict('records')
    ''' determine if non-loc viewing was on screen or offscreen'''
    eyebehavedict=assign_screenview(eyebehavedict,'xstart','ystart','start',xmax, ymax)
    eyebehavedict=assign_screenview(eyebehavedict,'xend','yend','end',xmax, ymax)

    '''adjust artifacts in eye data due to blinks'''
    new_previous_events=adjust_fix_before_blink(eyebehavedict)
    corrected_eye_events=adjust_event_after_blink(new_previous_events)

    '''put data back in df and remove old blinks'''
    subcleandf=eyedict_backto_df(corrected_eye_events)
    subcleandf['sub'] = sub

    fname='data/'+sub+'eyebehave.csv'
    subcleandf.to_csv(fname)
    print(sub, 'is done!')

  return _read(filepath_or_buffer, kwds)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.

804
(144, 27)
(144, 31)
(144, 31)
(144, 31)
     cuecond  dom_loc_rt  dom_loc_resp  cond  block  obj_type  dom_obj_id  \
0          1          72             8     1      1         3          93   
1          1          87             3     1      1         3         146   
2          1          90            11     2      1         3         206   
3          2          85            18     2      1         3          71   
4          1          68             2     1      1         3         184   
5          1          53            11     1      1         3          85   
6          2          74             7     1      1         3         188   
7          1          82            11     2      1         3         158   
8          2          95             4     2      1         3          15   
9          2          55             3     1      1         3           1   
10         1         158             9     0      1         3           0   
11         2          92        

/Volumes/Voss_Lab/MRI/domchoicefmri_djb/eye.data/
      event  start   end  duration  xstart  ystart    xend   yend  trialnum  \
0      EFIX      4   248       246   795.3   288.8     NaN    NaN         1   
1     ESACC    250   272        24   790.2   305.4   767.8  439.8         1   
2      EFIX    274   768       496   755.6   474.6     NaN    NaN         1   
3     ESACC    770   814        46   764.2   478.5  1240.2  225.1         1   
4      EFIX    816  1086       272  1238.8   228.2     NaN    NaN         1   
5     ESACC   1088  1098        12  1239.6   227.2  1257.7  211.2         1   
6      EFIX   1100  1410       312  1279.7   215.1     NaN    NaN         1   
7     ESACC   1412  1450        40  1285.0   227.9  1406.2  498.6         1   
8      EFIX   1452  1674       224  1411.3   527.4     NaN    NaN         1   
9     ESACC   1676  1696        22  1417.6   537.9  1465.4  620.8         1   
10     EFIX   1698  1880       184  1476.1   640.4     NaN    NaN         1   
11

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


804 is done!


In [7]:
def get_subids(behavepath):
    subfiles="*eyebehave.csv"
    sublist = []
    for filepathobj in behavepath.glob(subfiles):
        fname=filepathobj.name
        sub = fname[:3]
        sublist.append(sub)
    sublist.sort()
    return sublist

In [8]:
datastring = 'data/'
datapath = Path(datastring)
subids = get_subids(datapath)

alldata =pd.DataFrame()
for sub in subids:
    fname = datastring + sub + "eyebehave.csv"
    subdata = pd.read_csv(fname, index_col=0)
    alldata = alldata.append(subdata)
subcleandf = alldata[alldata['cond']>0]    
subcleandf.reset_index(drop=True, inplace=True)

In [9]:
subcleandf

Unnamed: 0,all_accuracy,block,cond,cuecond,dom_accuracy,duration,end,endloc,event,manip_accuracy,...,x1,x2,x3,xend,xstart,y1,y2,y3,yend,ystart
0,True,1,1.0,0.0,False,64,66,,blink,True,...,,,,,960.6,,,,,318.8
1,True,1,1.0,0.0,False,120,186,offscreen,ESACC,True,...,,,,976.1,958.7,,,,325.5,344.0
2,True,1,1.0,0.0,False,142,328,,EFIX,True,...,,,,,969.5,,,,,295.1
3,True,1,1.0,0.0,False,44,372,manip_objend,ESACC,True,...,,,,458.4,967.2,,,,175.2,286.9
4,True,1,1.0,0.0,False,146,518,,EFIX,True,...,,,,,443.6,,,,,170.3
5,True,1,1.0,0.0,False,12,530,manip_objend,ESACC,True,...,,,,406.6,435.1,,,,173.2,171.9
6,True,1,1.0,0.0,False,604,1134,,EFIX,True,...,,,,,401.4,,,,,181.1
7,True,1,1.0,0.0,False,50,1184,test_objend,ESACC,True,...,,,,792.1,410.6,,,,594.1,188.0
8,True,1,1.0,0.0,False,162,1346,,EFIX,True,...,,,,,800.3,,,,,610.4
9,True,1,1.0,0.0,False,14,1360,test_objend,ESACC,True,...,,,,836.8,805.0,,,,634.6,614.9
