In [39]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import *

In [40]:
def set_behavior_path(sub, behavestring):
    extra='recogarray.txt'
    behaveobj=[behavestring+sub+extra]
    behavepath=Path(behaveobj[0])
    behavepath.exists()
    return behavepath

def set_times_path(sub, behavestring):
    timesarrayextra='refreshtimes.txt'
    timesarrayobj=[behavestring+sub+timesarrayextra]
    timesarray_path=Path(timesarrayobj[0])
    timesarray_path.exists()
    return timesarray_path


def get_refresh_all(subids,pathstring):

    eyepath=Path(pathstring)
    if not eyepath.exists():
        print("can't find path, check connection!!")
        quit()

    #TODO: get the other files later
    masternames=get_eye_files(subids,eyepath)
    # study_all=masternames[masternames['phase']=="a"]
    refresh_all=masternames[masternames['phase']=="b"]
    # recog_all=masternames[masternames['phase']=="c"]
    return refresh_all

def load_data_for_subject(sub, refresh_all, pathstring, behavestring, is_pres=True):
    refresh_sub=[]
    refresh_sub=refresh_all[refresh_all['subject']==sub]
    eyearray = read_in_eye_data(refresh_sub,pathstring)
    if not len(eyearray):
        print('eyearray is empty!')


    behavepath = set_behavior_path(sub, behavestring)
    timesarray_path = set_times_path(sub, behavestring)

    behavearray=read_behave_file(behavepath)
    print('len(behavearray)', len(behavearray))
    #apply coordinate change to behavioral data if True in subdict
    if is_pres:
        behavearray=apply_adjust_pres_coords(behavearray)
        timesarray=read_times_file_pres(timesarray_path)
    else:
        timesarray=read_times_file_mat(timesarray_path)

    print('len(timesarray)', len(timesarray))
    return eyearray,behavearray,timesarray


In [41]:
def parse_eye_filename(pathobject):
    fname=pathobject.name
    parts=fname.split(".")[0]
    subject=parts[:5]
    other=parts[5:]
    has_r="r" in other
    if has_r:
        other=other.replace("r","")
    try:
        block=int(other[0])
        phase=other[1]
    except:
        block=int(other[1])
        phase=other[0]
    subdict={"subject":subject, "phase":phase,"block":block, "fname":fname}
    return subdict



def get_eye_files(subids,eyepath):
    """ returns master dataframe including eye file name, block, phase, subid
    input list of subject strings, Path object pointing to eye files
    """
    print(subids)
    substrings=[s+"*.asc" for s in subids]
    subinfo=[]
    for s in substrings:
        for filepathobj in eyepath.glob(s):
            subdict=parse_eye_filename(filepathobj)
            subinfo.append(subdict)

    masterdf=pd.DataFrame(subinfo).sort_values(by=["subject","phase","block"])
    print(masterdf.head())
    masterdf=masterdf[["subject","phase","block","fname"]]
    masterdf.index=range(len(masterdf))
    return masterdf


def parse_eye_events_to_intline(line,extrainfo):
    efixspace=["","",""]
    eblinkspace=efixspace*2
    newline=line.split()
    if "EFIX" in line:
        newline.extend(efixspace)
    elif "EBLINK" in line:
        newline.extend(eblinkspace)
    newline.extend(extrainfo)
    return newline


def parse_eye_line(phase_sub,pathstring):
    """ parses each line of eye file for a given phase_sub
    input one phase type list of files for a subs
    and the path to the file (in form of a string)
    outputs dataframe with all events in table
    """
    etypes=('ESACC','EFIX','EBLINK')
    events=[]
    blocks=phase_sub.block
    fnames=phase_sub.fname
    subjects=phase_sub.subject
    trialnum=0

    for block,fname,subject in zip(blocks,fnames,subjects):
        path_file=pathstring+fname
        p=Path(path_file)
        with p.open() as f:
            for line in f:
                if "START" in line:
                    trialnum=trialnum+1
                    startline=line.split()
                    starttime=int(startline[1])
                if any(e in line for e in etypes):
                    extrainfo=[starttime,trialnum,block,subject]
                    newline=parse_eye_events_to_intline(line,extrainfo)
                    events.append(newline)
            print(trialnum, block)    
    return events

def events_to_df(events):
    """ change raw events to data DataFrame
    then and change values to numeric"""

    eye_events_df=pd.DataFrame(events)
    eye_events_df=eye_events_df.apply(pd.to_numeric,errors='ignore')
    headers=["event","eye","start","end","duration",
    "xstart","ystart","xend","yend","?","?","trialstart",
    "trialnum","block","sub"]
    eye_events_df.columns=headers
    return eye_events_df

def eventsdf_cleanup(eye_events_df):
    """adjust trial start time, remove irrelevant values in fixation rows,
    and then delete excess columns"""

    eyedf_clean=eye_events_df.copy()

    eyedf_clean['start']=eyedf_clean['start']-eyedf_clean['trialstart']
    eyedf_clean['end']=eyedf_clean['end']-eyedf_clean['trialstart']

    efix_mask = (eyedf_clean["event"]=="EFIX")
    eyedf_clean.loc[efix_mask, 'xend'] = np.nan

    del eyedf_clean['trialstart']
    del eyedf_clean['?']
    del eyedf_clean['eye']

    return eyedf_clean

def read_in_eye_data(refresh_sub,pathstring):
    eye_events=parse_eye_line(refresh_sub,pathstring)
    eyedf=events_to_df(eye_events)
    eyearray=eventsdf_cleanup(eyedf)
    return eyearray

In [42]:
def read_behave_file(filepath):
    """read in behavearray, turn into DataFrame and delete extra columns"""
    colnames=['loc1x','loc1y','tmpx','tmpy','tmpdist','tmpmaxdist','tmpdistused','block','angle','loc3x','loc3y','loc2x','loc2y',
         'loc1-loc2dist','loc1-loc3dist','loc2-loc3dist','picid','contextid','cond',
         'study order','refresh order','recog order','same/diff','same/diff rt',
          'recog button', 'recog loc','recog rt','tmp']
    behavearray=pd.read_table(filepath,header=None,names=colnames)
    tmpmask=~behavearray.columns.str.contains('tmp')
    behavearray=behavearray[behavearray.columns[tmpmask]]
    return behavearray


def adjust_pres_coords(array,x,y,xmax=1920/2,ymax=1080/2):
    """adjustment for behavioral coords to match
    eye coords for presentation version of exp"""
    newarray=pd.DataFrame()
    newarray[x]=array[x]+xmax
    newarray[y]=(array[y]-ymax)*-1
    return newarray

def apply_adjust_pres_coords(behavearray):
    """applies adjust_pres_coords to all
    coords in behave array"""
    newloc1=adjust_pres_coords(behavearray,'loc1x','loc1y')
    newloc2=adjust_pres_coords(behavearray,'loc2x','loc2y')
    newloc3=adjust_pres_coords(behavearray,'loc3x','loc3y')
    newlocs=pd.concat([newloc1,newloc2,newloc3],axis=1)

    cols=newlocs.columns.tolist()
    for loc in cols:
        behavearray[loc]=newlocs[loc]
    return behavearray

def read_times_file_pres(timespath):
    timecolnames=['global trial start','objonset','trialend']
    timesdf=pd.read_table(timespath,header=None, names=timecolnames, index_col=False)
    print(timesdf.head())
    del timesdf['global trial start']
    return timesdf

def read_times_file_mat(timespath):
    print('running',timespath)
    timecolnames=['tmp1', 'tmp2', 'objonset','tmp3', 'tmp4']
    timesdf=pd.read_table(timespath,header=None,names=timecolnames, index_col=False)
    print(timesdf.head())
    to_delete=['tmp1', 'tmp2', 'tmp3', 'tmp4']
    for tmp in to_delete:
        del timesdf[tmp]
    timesdf['trialend']=np.nan
    return timesdf

In [43]:
def run_all():
    # subids=["ec105","ec106","ec107","ec108"]
    subids=["ec108"]
    matlab_subs = ["ec105", "ec106"]
    pathstring='/Volumes/Voss_Lab/ECOG/ecog/locationspace/ecog.eye/'
    behavestring='/Volumes/Voss_Lab/ECOG/ecog/locationspace/ecog.behave/'

    refresh_all = get_refresh_all(subids,pathstring)

    for sub in subids:
        is_pres = (sub not in matlab_subs)
        print('running', sub, 'using presentation', is_pres)
        output=load_data_for_subject(sub, refresh_all, pathstring, behavestring, is_pres)
    return output
eyearray,behavearray,timesarray=run_all()
#         preprocess_subject_dfs(sub, *output)
#         print(sub, 'is done!')

['ec108']
    block         fname phase subject
19      1  ec108ar1.asc     a   ec108
22      2  ec108ar2.asc     a   ec108
16      3  ec108ar3.asc     a   ec108
23      4  ec108ar4.asc     a   ec108
1       5  ec108ar5.asc     a   ec108
running ec108 using presentation True
16 1
32 2
48 3
64 4
80 5
96 6
112 7
128 8
len(behavearray) 128
   global trial start  objonset  trialend
0              280547      2033      8703
1              289349      2033      8503
2              297955      2030      9268
3              307325      2029      8398
4              315823      2034      8372
len(timesarray) 128


In [18]:
behavearray

Unnamed: 0_level_0,loc1x,loc1y,block,angle,loc3x,loc3y,loc2x,loc2y,loc1-loc2dist,loc1-loc3dist,...,picid,contextid,cond,study order,recog order,same/diff,same/diff rt,recog button,recog loc,recog rt
refresh order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,537.0,943.0,1,60,239.0,277.0,964.0,352.0,730,730,...,19,7,1,16,10,2,81,7,1,25
2,1201.0,155.0,1,-60,1461.0,622.0,1736.0,164.0,536,536,...,24,7,2,1,13,1,70,6,1,26
3,423.0,281.0,1,60,375.0,595.0,126.0,396.0,319,319,...,18,7,1,13,7,3,115,8,1,46
4,1542.0,737.0,1,60,897.0,957.0,1028.0,288.0,683,683,...,79,7,2,7,1,1,63,8,1,26
5,841.0,778.0,1,60,1182.0,123.0,1578.0,746.0,738,738,...,108,7,1,6,8,2,61,6,1,26
6,592.0,500.0,1,-60,841.0,945.0,1102.0,508.0,511,511,...,54,7,2,4,15,1,100,6,1,29
7,1078.0,672.0,1,-60,1539.0,945.0,1545.0,410.0,536,536,...,34,7,1,14,9,3,88,6,1,32
8,625.0,946.0,1,60,301.0,684.0,689.0,534.0,417,417,...,32,7,1,10,2,2,68,7,1,58
9,113.0,435.0,1,60,771.0,128.0,707.0,851.0,726,726,...,48,7,2,2,6,1,55,6,1,36
10,700.0,394.0,1,60,809.0,892.0,322.0,737.0,511,511,...,92,7,1,8,14,1,90,7,1,33


In [51]:
# def eye_behave_combo(eyearray,behavearray,timesarray):
eyebehave=eyearray.copy()
eyecols=eyebehave.columns.tolist()
behavecols=['loc1x','loc1y','loc2x','loc2y','loc3x','loc3y','recog loc','same/diff','cond']
allcols=eyecols+behavecols+['objonset','trialend']
eyebehave=eyebehave.reindex(columns=allcols)
order_col='refresh order'
behavearray.sort_values(by=[order_col], inplace=True)
behavearray.set_index(order_col, inplace=True)
for trial in range(0,behavearray.shape[0]):
    eyetrialevents=(eyebehave['trialnum']==trial+1)
    eyetrial=eyebehave.loc[eyetrialevents]

    for col in behavecols:
        eyetrial.loc[eyetrialevents,col]=behavearray.loc[trial+1,col]

    objonsetmask=timesarray.index==trial
    onsettrial=timesarray.loc[objonsetmask]
    eyetrial.loc[eyetrialevents,'objonset']=onsettrial.iloc[0]['objonset']
    eyetrial.loc[eyetrialevents,'trialend']=onsettrial.iloc[0]['trialend']

    eyebehave.loc[eyetrialevents]=eyetrial
eyebehave

KeyError: 'refresh order'

In [10]:
fix_num=pd.DataFrame(fix.groupby(['sub', 'block', 'trialnum', 'cond', 'startloc', 'recog loc', 'same/diff'])['count'].sum())
fix_num.reset_index(inplace=True)
fix_num.head()

Unnamed: 0,sub,block,trialnum,cond,startloc,recog loc,same/diff,count
0,ec105,1,2,2.0,loc1start,1.0,1.0,4
1,ec105,1,3,2.0,loc1start,1.0,1.0,2
2,ec105,1,4,2.0,loc1start,1.0,1.0,1
3,ec105,1,5,2.0,loc1start,2.0,1.0,2
4,ec105,1,6,1.0,loc1start,1.0,2.0,1
