# Import and extract gaze data
`3.gaze-data_import`

Import gaze data from the eye-tracking software, and incorporate trial outcomes into homogenized dataset

In [1]:
import pandas as pd

from pathlib import Path
from shutil import copy
from datetime import datetime

from _utils import extract
from _utils.transform import sum_oscillations, oscillation_rate

In [2]:
date = datetime.today().strftime('%y%m%d')

In [3]:
from config import sourcedata_dir as source_dir
from config import derivatives_dir as derivs_dir

staged_dir = source_dir / '.staging'
homog_dir = derivs_dir / '02.homogenized'
gaze_dir = derivs_dir / 'gaze-data'

# Import dataframes

In [4]:
fpath = homog_dir / ('econdec-full_task-main_beh_' + date + '.csv')
beh_df = pd.read_csv(fpath)

change the path below - replace with new gaze data paths

In [5]:
trial_fpath = [f for f in staged_dir.glob('TrialReport.*.xls')][-1]
choice_fpath = [f for f in staged_dir.glob('Choice.*.xls')][-1]
outcome_fpath = [f for f in staged_dir.glob('StockOutcome.*.xls')][-1]

In [6]:
trial_df = pd.read_csv(trial_fpath, sep='\t')
choice_df = pd.read_csv(choice_fpath, sep='\t')
outcome_df = pd.read_csv(outcome_fpath, sep='\t')

  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)


# Rename columns

In [7]:
from config import new_columns

In [8]:
trial_df = trial_df.rename(columns = new_columns)
choice_df = choice_df.rename(columns = new_columns)
outcome_df = outcome_df.rename(columns = new_columns)

## Extract only the gaze data for main task series

The dataframe contains gaze data for practice trials, where `practice == 1` or `practice == 2`.

It also contains gaze data for the Memory task trials, where `Phase == Fract` or `Phase == Face`.

We'll slice all these rows out, leaving only the relevant main task trials (1-72).

In [9]:
trial_df = trial_df[(trial_df['Phase'] == 'Main Task')]
choice_df = choice_df[(choice_df['practice'] == 3) & (choice_df['Phase'] == 'Main Task')]
outcome_df = outcome_df[(outcome_df['practice'] == 3) & (outcome_df['Phase'] == 'Main Task')]

# Fix gaze trial numbers

They were offset by 4 by the practice trials (1-4) we removed. Re-setting to initialize at 1

In [10]:
choice_df['trial'].value_counts()

5    5266
3    5261
2    5261
6    5256
4    5256
1    5256
Name: trial, dtype: int64

# Drop irrelevant columns

There is a lot of data here, so we can implicitly select only the columns we need.

We'll rename them to be a little more clear next.

In [14]:
choice_df = choice_df[[
    'subjnum','trial',
    'IA_LABEL','IA_ID',
    'IA_FSA_COUNT_1','IA_FSA_COUNT_2',
    'IA_FSA_COUNT_24','IA_FSA_COUNT_25',
    'IA_FSA_COUNT_26','IA_FSA_COUNT_27',
]]

In [15]:
outcome_df = outcome_df[[
    'subjnum','trial',
    'IA_ID','IA_LABEL',
    'IA_DWELL_TIME'
]]

# Rename remaining columns

In the choice phase, we need to keep this entire matrix in order to calculate the oscillation rate. Each row represents an on-screen interest area for a given trial, denoted redundantly with `ia-id` and `ia-label`. There are six(6) relevant interest areas during the choice phase, so there are six(6) rows per trial.

The matrix tells us how many times a saccade started in one interest area and ended in another. Each of these is called a "fixation skip" (FSA), starting in that row's interest area, and ending in the interest area denoted by the `fsa-ia-` columns.

In [16]:
choice_df = choice_df.rename(columns={
    'IA_ID':'ia-id',
    'IA_LABEL':'ia-label',
    'IA_DWELL_TIME':'dwell-time',
    'IA_FSA_COUNT_1':'fsa-ia-01',
    'IA_FSA_COUNT_2':'fsa-ia-02',
    'IA_FSA_COUNT_24':'fsa-ia-24',
    'IA_FSA_COUNT_25':'fsa-ia-25',
    'IA_FSA_COUNT_26':'fsa-ia-26',
    'IA_FSA_COUNT_27':'fsa-ia-27'
})

In [17]:
outcome_dwell_time = outcome_df.rename(columns={
    'IA_ID':'ia-id',
    'IA_LABEL':'ia-label',
    'IA_DWELL_TIME':'dwell-time'
})

# Transform matrix into oscillation sum

We'll first define a function `sum_oscillations` to use with `df.apply()` to sum up the number of oscillations from any a given row's interest area to any of the interest areas on the other side of the screen.

In [18]:
choice_df[[
    'fsa-ia-01','fsa-ia-02','fsa-ia-24','fsa-ia-25','fsa-ia-26','fsa-ia-27'
]] = choice_df[[
    'fsa-ia-01','fsa-ia-02','fsa-ia-24','fsa-ia-25','fsa-ia-26','fsa-ia-27'
]].astype(int)

choice_df = choice_df.drop(columns='ia-label')

In [19]:
choice_df['oscillations'] = choice_df.apply(sum_oscillations,axis=1)

Next we use `df.groupby()` and `df.sum()` to collect and summate the oscillations into trialwise rows for merging into `beh_frame`

In [20]:
oscillations = choice_df.groupby(['subjnum','trial'])
oscillations = oscillations.sum()['oscillations']
oscillations = oscillations.reset_index()

#### Merge on trial number will not work until behavioral data comes in with trials numbered 1-72

In [23]:
beh_df = beh_df.merge(oscillations,'left')

ValueError: You are trying to merge on int64 and object columns. If you wish to proceed you should use pd.concat

# Extract outcome "Bubble" dwell time

All we need here is the `dwell-time` for one interest area in particular (`ia-id == 5`)

In [25]:
outcome_dwell_time = outcome_dwell_time[outcome_dwell_time['ia-id']==5]

In [26]:
outcome_dwell_time = outcome_dwell_time[['subjnum','trial','dwell-time']]

In [27]:
beh_df[['subjnum','trial']].dtypes

subjnum    int64
trial      int64
dtype: object

In [35]:
outcome_dwell_time.dtypes

subjnum       object
trial          int32
dwell-time     int64
dtype: object

In [34]:
outcome_dwell_time['subjnum'] = outcome_dwell_time['subjnum'].astype(str).str.split(pat = ".")

In [31]:
outcome_dwell_time['subjnum'] = outcome_dwell_time['subjnum'].astype(int)

ValueError: Cannot convert non-finite values (NA or inf) to integer

In [66]:
outcome_dwell_time.dtypes

subjnum       object
trial          int32
dwell-time     int64
dtype: object

In [47]:
beh_df = beh_df.merge(outcome_dwell_time, on=['subjnum','trial'])

ValueError: You are trying to merge on int64 and object columns. If you wish to proceed you should use pd.concat

# Calculate oscillation rate

Divide the sum count of oscillations by the number of seconds spent on the choice phase

In [None]:
behav_df['osc-rate'] = behav_df.apply(oscillation_rate,axis=1)

# Output

In [14]:
gaze_dir = derivs_dir / '03.gaze-import'
if not Path.exists(gaze_dir): Path.mkdir(gaze_dir)

In [15]:
fpath = gaze_dir / ('econdec-full_task-all_eye_' + date + '.csv')

In [23]:
behav_df.to_csv(fpath,index=False)