# Eye tracking: preprocessing

This script imports the raw eye tracking (ET) data from the `Subjects` folder
and runs a few preprocessing steps on them:

1. Transform the gaze vectors into spherical coordinates (`theta`: horizontal 
gaze angle, `phi`: vertical)  
2. Calculate the time relative to the stimulus onset event.  
3. Correct for the temporal offset between the actual eye movement and the time 
the eye tracker provides the accordinmg sample (using cross-correlation with the 
instantaneous EOG signal)
4. Subtract the mean baseline in a window of `200 ms` before stimulus onset from
each trial (separately for `theta` and `phi`).
5. Optional (see parameter `blinks_interpolate` at the beginning of the notebook): identify blinks in the data and replace them via linear interpolation. 
6. Save the files as pandas data frames.  
  
⚠️ We only have ET data for a subset of the participants. 

In [1]:
%load_ext autoreload
%autoreload 2

import os
from multiprocessing import Pool, freeze_support
from pathlib import Path

import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from vr2f.eyetracking import et_preprocessing, lag_calculator_et_vs_eog
from vr2f.staticinfo import CONSTANTS, PATHS


In [2]:
def run_preprocessing(sub_id):  # noqa: PLR0915
    """Run the ET preprocessing pipeline for a single subject."""
    # Set to True if you want to interpolate blinks
    blinks_interpolate = True

    paths = PATHS()
    constants = CONSTANTS()

    cond_dict = {
        "viewcond": {1: "mono", 2: "stereo"},
        "emotion": {1: "neutral", 2: "happy", 3: "angry", 4: "surprised"},
        "avatar_id": {1: "Woman_01", 2: "Woman_04", 3: "Woman_08"},
    }

    print(f"Processing {sub_id}...\n")

    markerlog_stimonsets = et_preprocessing.get_stimonset_df(sub_id)
    if markerlog_stimonsets is None:
        print(f"    Skipping {sub_id}.")
        return 0
    markerlog_stimonsets_np = markerlog_stimonsets["timestamp_corrected"].to_numpy()

    et_files, path_et_files = et_preprocessing.get_et_rawfiles(sub_id)
    if et_files is None:
        print(f"    Skipping {sub_id}.")
        return 0

    df_all = pd.DataFrame()
    for f in tqdm(et_files):
        t_num = int(f.split(".csv")[0][-3:])
        fname = Path(path_et_files, f)
        df = pd.read_csv(fname, sep=",")

        df = df.reset_index(drop=False)

        # skip this file if it is empty apart from the header
        if df.shape[0] == 0:
            continue
        # add 3 columns theta, phi, r to df
        df[["theta", "phi", "r"]] = et_preprocessing.cart2sph_custom(
            df["direction_x_local"], df["direction_y_local"], df["direction_z_local"]
        )

        # calculate relative time to stimulus onset:
        times = df["time"] - markerlog_stimonsets_np[t_num - 1]
        # find the index of the stimulus onset
        idx_stimonset = np.argmin(np.abs(times))

        df["times"] = (df["timestamp_lsl"] - df.loc[idx_stimonset, "timestamp_lsl"])

        # crop to [-0.5; 1.5]s window; assuming a stable srate of 120Hz
        srate = 120
        df = df.iloc[np.max([idx_stimonset - int(0.5 * srate), 0]) : idx_stimonset + int(1.5 * srate) + 1, :]

        # add info about the trial
        df["trial_num"] = t_num
        df["marker"] = markerlog_stimonsets["annotation"].iloc[t_num - 1]
        # split marker on whitespace and keep only last part
        df["marker"] = df["marker"].str.split().str[-1]
        # marker is a 3digit int; split it into its single digits and put each digit into a separate column
        df["viewcond"] = df["marker"].str[0].astype(int)
        df["avatar_id"] = df["marker"].str[1].astype(int)
        df["emotion"] = df["marker"].str[2].astype(int)
        df["emotion"] = df["emotion"].map(cond_dict["emotion"])
        df["avatar_id"] = df["avatar_id"].map(cond_dict["avatar_id"])
        df["viewcond"] = df["viewcond"].map(cond_dict["viewcond"])

        df_all = pd.concat([df_all, df], ignore_index=True)

    if df.shape[0] == 0:
        print(f"    Skipping {sub_id} because there is no eye tracking data.")
        return 0

    # adaptively correct for the lag between ET and EOG
    lag_calculator = lag_calculator_et_vs_eog.LagCalculatorEyetrackingVsEog()
    lag = lag_calculator.get_et_vs_eog_lag(sub_id, df_all, plot_it=True, write_csv=False)
    df_all["times"] = df_all["times"] - lag
    print(f"    Correcting for lag of {lag}ms for {sub_id}.")

    if blinks_interpolate:
        df_all = df_all.groupby(["trial_num"]).apply(et_preprocessing.interpolate_blinks)

    # calculate a baseline for each trial
    baselines = (
        df_all.query("times < 0 & times > -0.2").loc[:, ["theta", "phi", "trial_num"]].groupby(["trial_num"]).mean()
    )

    # subtract the baseline from each trial
    df_all = df_all.merge(baselines, on="trial_num", suffixes=("", "_baseline"))
    df_all["theta"] = df_all["theta"] - df_all["theta_baseline"]
    df_all["phi"] = df_all["phi"] - df_all["phi_baseline"]

    df_all = (df_all.groupby(["trial_num"])
                .apply(et_preprocessing.set_to_fixed_sample_length,
                       sfreq=constants.SFREQ_ET, dur_pre=0.4, dur_post=1.1))

    # save to csv
    path_out = Path(paths.DATA_ET_PREPROC)
    if blinks_interpolate:
        fname = Path(path_out, f"{sub_id}-ET-withoutblinks-preproc.csv")
    else:
        fname = Path(path_out, f"{sub_id}-ET-withblinks-preproc.csv")
    df_all.to_csv(fname, sep=",", index=False)

    n_trials = df_all["trial_num"].nunique()
    print(f"    {sub_id}: {n_trials} trials")
    return 1

In [5]:

paths = PATHS()
sub_list_str = os.listdir(paths.DATA_SUBJECTS)
sub_list_str = sorted(sub_list_str)
n_valid_subs = 0

pool_obj = Pool()
res = pool_obj.map(run_preprocessing, sub_list_str)
n_valid_subs = sum(res)
print(f"\nN of subjects with valid ET data: {n_valid_subs}.")

Processing VR2FEM_S01...
Processing VR2FEM_S02...

Processing VR2FEM_S03...
Processing VR2FEM_S04...
Processing VR2FEM_S05...
Processing VR2FEM_S08...
Processing VR2FEM_S10...
Processing VR2FEM_S12...

Processing VR2FEM_S14...
Processing VR2FEM_S15...

Processing VR2FEM_S13...
Processing VR2FEM_S07...
Processing VR2FEM_S06...
Processing VR2FEM_S11...

Processing VR2FEM_S21...

Processing VR2FEM_S17...
Processing VR2FEM_S23...
Processing VR2FEM_S16...



Processing VR2FEM_S18...
Processing VR2FEM_S19...
Processing VR2FEM_S20...


Processing VR2FEM_S24...
Processing VR2FEM_S26...

Processing VR2FEM_S27...
Processing VR2FEM_S25...

Processing VR2FEM_S22...


Processing VR2FEM_S29...
Processing VR2FEM_S30...
Processing VR2FEM_S33...

Processing VR2FEM_S31...


Processing VR2FEM_S32...
Processing VR2FEM_S28...

Processing VR2FEM_S34...















    No markerlog for VR2FEM_S04
    Skipping VR2FEM_S04.
Skipping VR2FEM_S30 because there are no eye tracking files.
    Skipping VR2FEM_S30.

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/743 [00:00<?, ?it/s]

  0%|          | 0/729 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/554 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

  0%|          | 0/744 [00:00<?, ?it/s]

    Skipping VR2FEM_S06 because there is no eye tracking data.
    Skipping VR2FEM_S23 because there is no eye tracking data.
    Skipping VR2FEM_S34 because there is no eye tracking data.
    Skipping VR2FEM_S17 because there is no eye tracking data.
    Skipping VR2FEM_S22 because there is no eye tracking data.
    Skipping VR2FEM_S27 because there is no eye tracking data.
    Skipping VR2FEM_S20 because there is no eye tracking data.
    Skipping VR2FEM_S33 because there is no eye tracking data.
    Skipping VR2FEM_S26 because there is no eye tracking data.
    Skipping VR2FEM_S14 because there is no eye tracking data.
    Skipping VR2FEM_S29 because there is no eye tracking data.
    Skipping VR2FEM_S32 because there is no eye tracking data.
    Skipping VR2FEM_S28 because there is no eye tracking data.
    Skipping VR2FEM_S18 because there is no eye tracking data.


  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/717 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/717 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

  0%|          | 0/720 [00:00<?, ?it/s]

#### VR2FEM_S03 ####
Top correlation for vertical: 137 at 5.0
Top correlation for horizontal: 139 at 7.0
    Correcting for lag of 0.05ms for VR2FEM_S03.
#### VR2FEM_S31 ####
Top correlation for vertical: 139 at 7.0
Top correlation for horizontal: 140 at 8.0
    Correcting for lag of 0.0625ms for VR2FEM_S31.
#### VR2FEM_S10 ####
Top correlation for vertical: 139 at 7.0
Top correlation for horizontal: 139 at 7.0
    Correcting for lag of 0.058333333333333334ms for VR2FEM_S10.
#### VR2FEM_S11 ####
Top correlation for vertical: 140 at 8.0
Top correlation for horizontal: 140 at 8.0
    Correcting for lag of 0.06666666666666667ms for VR2FEM_S11.
#### VR2FEM_S01 ####
Top correlation for vertical: 139 at 7.0
Top correlation for horizontal: 139 at 7.0
    Correcting for lag of 0.058333333333333334ms for VR2FEM_S01.
    VR2FEM_S03: 744 trials
#### VR2FEM_S12 ####
Top correlation for vertical: 139 at 7.0
Top correlation for horizontal: 140 at 8.0
    VR2FEM_S31: 743 trials
    Correcting for lag