# purpose: readin and transorm the data from experiment 2

tracker
- for each subjects read in the raw eyelink files (.txt), for pupil and gaze data
- transform the long format(time x data) into wide format (trial x time)
- add taggings for trial (subjectID, trial#, block#)
- combine the files from every subjects into a mega csv sheet and save. Resulting in 2 gaze csv (x and y channels), 1 pupil csv

behavior
- readin the psychopy (i.e. the behavioral output)
- only select the trials from experimental blocks, exclude practice trials
- combine the files from every subjects into a mega csv sheet and save. Resulting in 1 behavioral csv

consent
- readin consent, get subject mean age,gender counts

upnext: cleanELPup.ipynb

# imports and functions

In [3]:
import warnings
warnings.filterwarnings("ignore")
#for handling RAM
import gc 

#stats
from statsmodels.stats.anova import AnovaRM
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
import pingouin as pg


#my to go packages
import math
import random
import pandas as pd
import numpy as np

#plotting
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from matplotlib.gridspec import GridSpec
from matplotlib import cm


#for readin
import glob # Package for Unix-style pathname pattern expansion
import os   # Python operating system interface

#for signal processing
from scipy.interpolate import CubicSpline,interp1d
import scipy.signal as signal
import scipy.stats as stats
from scipy.signal import filtfilt, butter
from scipy.fft import rfft, rfftfreq
from scipy.stats import sem 
from scipy.stats import norm
from scipy.stats import ttest_rel

#other packages
import ast
import itertools
from itertools import groupby
from more_itertools import consecutive_groups
import more_itertools as mit
from operator import itemgetter
import statistics
from sklearn.mixture import GaussianMixture



# load params

In [4]:
pd.set_option('display.max_columns', 500)


In [6]:
#read in the mega df
local_username = 'yud070' 
pathEL = 'Z:/LiteBrite_YueyingDong/22AK01ELRaw_tracker/'
pathpy = 'Z:/LiteBrite_YueyingDong/22AK01ELRaw_psychopy/'

readIn = [f for f in os.listdir(pathEL)] #list of subject id

In [None]:
#define the relevant columns that we need to extract from the pupil datasets
relevantCols = ['RECORDING_SESSION_LABEL','AVERAGE_GAZE_X',
       'AVERAGE_GAZE_Y','AVERAGE_PUPIL_SIZE','EYE_TRACKED','IP_LABEL','IP_START_TIME',
      'LEFT_PUPIL_SIZE','RIGHT_PUPIL_SIZE','SAMPLE_MESSAGE','TIMESTAMP','TRIALID']
pupCols = ['EYE_TRACKED','LEFT_PUPIL_SIZE','RIGHT_PUPIL_SIZE','AVERAGE_PUPIL_SIZE',
           'IP_LABEL','TIMESTAMP','TRIALID']

In [8]:
#timing params
ts = [1000,1250,1750,2250,4250,4750] #stim onset, rest, retrocue, delay, probecue

stim_ts = 0.25 
precue_ts = 0.5 + stim_ts
retrocue_ts = 0.5 + precue_ts
post_cue_ts = 2 + retrocue_ts
probe_cue = 0.5 + post_cue_ts
probe_ts = 3 + post_cue_ts

epoch_array = np.array([0,stim_ts,precue_ts,retrocue_ts,post_cue_ts,probe_cue])
epoch_name = ['stim','rest','retrocue','rest','probe_cue','probe']


In [9]:
epoch_array

array([0.  , 0.25, 0.75, 1.25, 3.25, 3.75])

In [None]:
#some subject relevant params; i.e. these subjects only got one eye tracked
leftEyeSubj = ['173','175']
rightEyeSubj = ['183','185']

#events
events = ['fixateRest','stim','precueRest', 'retrocue',
                            'postcueRest','probecue', 'probe', 'ITI','trialEnd']
eventIndex = [0,1000,1250, 1750,2250,4250,4750,7750,8250]

In [None]:
#take the first [0,5]s in each recording, though the interest period is [0.5,4.75], but the padding
#is included for better interpolating

#params for reading in
included = 5000 #5000ms

#params for cleaning
sdThreshold = 6
pltAll = True
interpCorrection = np.zeros(len(readIn))
maxGapLen = 0.5
minGapLen = 0.0
freq = 1000
gapMinN = minGapLen*freq

In [None]:
#for recording everyone's preprocessed data along with saccade tags
psyFull = pd.DataFrame()

pupFullTrial_raw = pd.DataFrame()
saccFullTrial_x_raw = pd.DataFrame()
saccFullTrial_y_raw = pd.DataFrame()
eventDf = pd.DataFrame()

In [None]:
#for plotting
cList = ['#fda48c','#f23908','#93648f','#33202a','#66b3ba','#03838f',]
cDict = {"['high']":'#f23908',"['medium']":'#03838f',"['low']":'#593247'} 


# read eyelink file

In [None]:
#readIn raw files
for n,f in enumerate(readIn):

    """ section1: read in """
    f = str(f)
    print('start processing: ' + f)

    #read in the eyelink file
    el = pd.read_csv(glob.glob(pathEL+ f + '/*.txt')[0]
             ,delimiter = "\t", usecols = relevantCols)
    
    # manually correct for 149; cuz during experiment the id got wrongfully inputted as 148. So had to correct that.
    # sorry.
    if f == '149': 
        el['TRIALID'] = [i.replace('148','149') for i in el['TRIALID']]
        
    print('     readin: ' + f) 

    #decide which eye to use, default use avg pupil size from both eyes
    selectEye = 'AVERAGE_PUPIL_SIZE'
    if f in leftEyeSubj:
        selectEye = 'LEFT_PUPIL_SIZE'
    elif f in rightEyeSubj:
        selectEye = 'RIGHT_PUPIL_SIZE'
    elif el.EYE_TRACKED.iloc[0] != 'Binocular':
        selectEye = el.EYE_TRACKED.iloc[0].upper()+ '_PUPIL_SIZE'

    #after reading in el file, transform to trial * timestamp structure
    pseudodf = el[(el.IP_LABEL == 'fullTrial')]
    pseudodf = pseudodf.rename(columns={selectEye:'diameter_3d','TIMESTAMP':'pupil_timestamp'}).reset_index(drop = True)
    pseudodf['diameter_3d'] = pd.to_numeric(pseudodf['diameter_3d'].replace('.',0))
    pseudodf['AVERAGE_GAZE_X'] = pd.to_numeric(pseudodf['AVERAGE_GAZE_X'].replace('.',np.nan))
    pseudodf['AVERAGE_GAZE_Y'] = pd.to_numeric(pseudodf['AVERAGE_GAZE_Y'].replace('.',np.nan))
    pseudodf['eventOnset'] = pseudodf.SAMPLE_MESSAGE.astype(str) #prepare for index for  epoching
    pseudodf['x'] = pseudodf.groupby('TRIALID').cumcount() #prepare for transformation

    #raw pup and sacc, and another df dor epoch ts
    pupRaw = pd.pivot_table(pseudodf[['x','diameter_3d','TRIALID']],values = 'diameter_3d',columns = 'x',index = 'TRIALID').reset_index().iloc[:,:included+1]
    saccAllT_X = pd.pivot_table(pseudodf[['x','AVERAGE_GAZE_X','TRIALID']],values = 'AVERAGE_GAZE_X',columns = 'x',index = 'TRIALID').reset_index().iloc[:,:included+1]
    saccAllT_Y = pd.pivot_table(pseudodf[['x','AVERAGE_GAZE_Y','TRIALID']],values = 'AVERAGE_GAZE_Y',columns = 'x',index = 'TRIALID').reset_index().iloc[:,:included+1]
    
    #extract event onset time
    eventOnset = pseudodf.pivot(values = 'eventOnset',columns = 'x',index = 'TRIALID').reset_index()
    eventOnset_subj = eventOnset.iloc[:,1:].apply(lambda x: np.where((x !='.')&(~x.isnull()))[0],axis = 1) +1
    eventDf_subj = pd.DataFrame(np.concatenate(eventOnset_subj).reshape(len(eventOnset_subj), len(eventOnset_subj[0])), 
                 columns = events[:len(eventOnset_subj[0])])
    eventDf_subj.insert(loc = 0,column = 'TRIALID',value = eventOnset.TRIALID)
    
    #concate
    print('     start concate: ' + f)

    #pupil.
    pupFullTrial_raw = pd.concat([pupFullTrial_raw,pupRaw])
    #gaze.
    saccFullTrial_x_raw = pd.concat([saccFullTrial_x_raw,saccAllT_X])
    saccFullTrial_y_raw = pd.concat([saccFullTrial_y_raw,saccAllT_Y])
    #event.
    eventDf = pd.concat([eventDf,eventDf_subj])


# read psychopy file

In [None]:
psyFull = pd.DataFrame() #placeholder

for f in readIn:
    f = str(f)
    print('start processing' + f)
    trlInfo = pd.read_csv(glob.glob(pathpy+f+ '/*.csv')[0])

    #reject the practice trials, only keep the real trials
    try:
        trlInfo = trlInfo[(trlInfo.is_practice!=True)& (trlInfo.identifier.isnull() == False)]
    except AttributeError:
        trlInfo = trlInfo[(trlInfo.identifier.isnull() == False)]

    #manually correct for subject 149 cuz their id was inputted wrongly.    
    if f == '149':
        trlInfo['identifier'] = [i.replace('148','149') for i in trlInfo.identifier]
        trlInfo['participant'] = 149.0
    psyFull = pd.concat([psyFull,trlInfo])

psyFull = psyFull.drop_duplicates()

## consent

In [None]:
consentInfo = pd.read_csv('Z:/LiteBrite_YueyingDong/data4Paper/consentInfo.csv')
consentInfo = consentInfo[consentInfo.id.isin(psyFull.participant.unique())].reset_index(drop = True)

In [None]:
consentInfo['gender'].value_counts()

In [None]:
consentInfo.age.mean(),consentInfo.age.std()

# save 


In [None]:
pathUnparsed = 'Z:/LiteBrite_YueyingDong/temp/unparsed/'
subjs = input() #'148to169'

In [None]:
#save pup
pupFullTrial_raw.to_csv(pathUnparsed + 'pupRaw/' + 'pupUnparsed_raw_'+ subjs+ '.csv')

#save gaze, this is unmasked raw gaze
saccFullTrial_x_raw.to_csv(pathUnparsed + 'saccX/' + 'saccUnparsed_X_raw'+ subjs+ '.csv')
saccFullTrial_y_raw.to_csv(pathUnparsed + 'saccY/'+ 'saccUnparsed_Y_raw'+ subjs+ '.csv')

#save event timings
eventDf.to_csv(pathUnparsed + 'event/' + 'eventUnparsed_'+ subjs+ '.csv')