# Import and extract gaze data
`3.gaze-data_import`

Import gaze data from the eye-tracking software, and incorporate trial outcomes into homogenized dataset

In [None]:
import pandas as pd

from pathlib import Path
from shutil import copy
from datetime import datetime

from _utils import extract
from _utils.transform import sum_oscillations, oscillation_rate

In [None]:
date = datetime.today().strftime('%y%m%d')

In [None]:
from config import sourcedata_dir as source_dir
from config import derivatives_dir as derivs_dir

staged_dir = source_dir / '.staging'
homog_dir = derivs_dir / '02.homogenized'
gaze_dir = derivs_dir / 'gaze-data'

# Import dataframes

In [None]:
fpath = homog_dir / ('econdec-full_task-main_beh_' + date + '.csv')
beh_df = pd.read_csv(fpath)

change the path below - replace with new gaze data paths

In [None]:
trial_fpath = [f for f in staged_dir.glob('TrialReport.*.xls')][-1]
choice_fpath = [f for f in staged_dir.glob('Choice.*.xls')][-1]
outcome_fpath = [f for f in staged_dir.glob('StockOutcome.*.xls')][-1]

In [None]:
trial_df = pd.read_csv(trial_fpath, sep='\t')
choice_df = pd.read_csv(choice_fpath, sep='\t')
outcome_df = pd.read_csv(outcome_fpath, sep='\t')

# Rename columns

In [None]:
from config import new_columns

In [None]:
trial_df = trial_df.rename(columns = new_columns)
choice_df = choice_df.rename(columns = new_columns)
outcome_df = outcome_df.rename(columns = new_columns)

## Extract only the gaze data for main task series

The dataframe contains gaze data for practice trials, where `practice == 1` or `practice == 2`.

It also contains gaze data for the Memory task trials, where `Phase == Fract` or `Phase == Face`.

We'll slice all these rows out, leaving only the relevant main task trials (1-72).

In [None]:
trial_df = trial_df[(trial_df['Phase'] == 'Main Task')]
choice_df = choice_df[(choice_df['practice'] == 3) & (choice_df['Phase'] == 'Main Task')]
outcome_df = outcome_df[(outcome_df['practice'] == 3) & (outcome_df['Phase'] == 'Main Task')]

# Fix gaze trial numbers

They were offset by 4 by the practice trials (1-4) we removed. Re-setting to initialize at 1

In [None]:
choice_df['trial'].value_counts()

# Drop irrelevant columns

There is a lot of data here, so we can implicitly select only the columns we need.

We'll rename them to be a little more clear next.

In [None]:
choice_df = choice_df[[
    'subjnum','block','trial',
    'IA_LABEL','IA_ID',
    'IA_FSA_COUNT_1','IA_FSA_COUNT_2',
    'IA_FSA_COUNT_24','IA_FSA_COUNT_25',
    'IA_FSA_COUNT_26','IA_FSA_COUNT_27',
]]

In [None]:
outcome_df = outcome_df[[
    'subjnum','block','trial',
    'IA_ID','IA_LABEL',
    'IA_DWELL_TIME'
]]

# Rename remaining columns

In the choice phase, we need to keep this entire matrix in order to calculate the oscillation rate. Each row represents an on-screen interest area for a given trial, denoted redundantly with `ia-id` and `ia-label`. There are six(6) relevant interest areas during the choice phase, so there are six(6) rows per trial.

The matrix tells us how many times a saccade started in one interest area and ended in another. Each of these is called a "fixation skip" (FSA), starting in that row's interest area, and ending in the interest area denoted by the `fsa-ia-` columns.

In [None]:
choice_df = choice_df.rename(columns={
    'IA_ID':'ia-id',
    'IA_LABEL':'ia-label',
    'IA_DWELL_TIME':'dwell-time',
    'IA_FSA_COUNT_1':'fsa-ia-01',
    'IA_FSA_COUNT_2':'fsa-ia-02',
    'IA_FSA_COUNT_24':'fsa-ia-24',
    'IA_FSA_COUNT_25':'fsa-ia-25',
    'IA_FSA_COUNT_26':'fsa-ia-26',
    'IA_FSA_COUNT_27':'fsa-ia-27'
})

In [None]:
outcome_dwell_time = outcome_df.rename(columns={
    'IA_ID':'ia-id',
    'IA_LABEL':'ia-label',
    'IA_DWELL_TIME':'dwell-time'
})

# Transform matrix into oscillation sum

We'll first define a function `sum_oscillations` to use with `df.apply()` to sum up the number of oscillations from any a given row's interest area to any of the interest areas on the other side of the screen.

In [None]:
choice_df[[
    'fsa-ia-01','fsa-ia-02','fsa-ia-24','fsa-ia-25','fsa-ia-26','fsa-ia-27'
]] = choice_df[[
    'fsa-ia-01','fsa-ia-02','fsa-ia-24','fsa-ia-25','fsa-ia-26','fsa-ia-27'
]].astype(int)

choice_df = choice_df.drop(columns='ia-label')

In [None]:
choice_df['oscillations'] = choice_df.apply(sum_oscillations,axis=1)

Next we use `df.groupby()` and `df.sum()` to collect and summate the oscillations into trialwise rows for merging into `beh_frame`

In [None]:
oscillations = (choice_df.groupby(['subjnum','block','trial'], as_index=False)
                   .sum())

# Extract outcome "Bubble" dwell time

All we need here is the `dwell-time` for one interest area in particular (`ia-id == 5`)

In [None]:
outcome_dwell_time = outcome_dwell_time[outcome_dwell_time['ia-id']==5]

In [None]:
outcome_dwell_time = outcome_dwell_time[['subjnum','block','trial','dwell-time']]

## Merge on trial number alone will not work

Unless trials are named absolutely and sequentially (i.e., 1-72)

Instead we can merge on the union of `['subjnum', 'block', 'trial']`, using a left-join so we retain subjects who have no gaze data.

In [None]:
output_df = (beh_df.merge(oscillations, 'left', on=['subjnum', 'block', 'trial'])
                   .merge(outcome_dwell_time, 'left', on=['subjnum', 'block', 'trial']))

# Calculate oscillation rate

Divide the sum count of oscillations by the number of seconds spent on the choice phase

In [None]:
output_df['osc-rate'] = (output_df['oscillations'] / output_df['choicert'])

In [None]:
output_df.sample(9)

# Output

In [None]:
gaze_dir = derivs_dir / '03.gaze-import'
if not Path.exists(gaze_dir): Path.mkdir(gaze_dir)

In [None]:
fpath = gaze_dir / ('econdec-full_task-all_eye_' + date + '.csv')

In [None]:
output_df.to_csv(fpath,index=False)