Use this notebook to calculate and save saccades and fixations from the eye tracking data. Subsequent scripts which
perform the analyses on saccades and fixations load the according files from the data folder. The results come precalculated
with the dataset but if you want you can recalculate and overwrite them here. 

In [None]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from multiprocessing import Pool
from tqdm.auto import tqdm
from vr2f.eyetracking import et_plot_gaze_saccades, et_utils, ms_toolbox
from vr2f.staticinfo import PATHS, CONSTANTS


In [2]:
%load_ext autoreload
%autoreload 2

In [None]:
paths = PATHS()
constants = CONSTANTS()

## Calculate saccades and fixations
In the following we use algorithms from the Microsaccade Toolbox (Engbert & Kliegl, 2003) to detect saccades (and 
fixations) in our data and write out files with this information.  
Running these calculations from the notebook can take a while. So I recommend to read in the saved files and skip this 
step. You can also rerun them once and overwrite the saved files. 

In [None]:
RECALCULATE_SACC_AND_FIX_DATA = False
WRITE_TO_DISK = False

In [None]:

def get_sacc_fix_per_sub(args):
    df_all, trial_range, sub_id = args
    print(f"Processing {sub_id}")
    sfreq = constants.SFREQ_ET
    vfac = constants.ET_SACC_VFAC
    mindur = constants.ET_SACC_MINDUR

    df_sub = df_all.query("sub_id == @sub_id").copy()
    df_gaze_subject = pd.DataFrame()

    sacs = pd.DataFrame()
    for trial_idx in tqdm(trial_range):
        df_st = et_plot_gaze_saccades.get_data_sub_trialnum(df_sub, sub_id, trial_idx)
        if df_st.empty:
            print(f"!!! Warning: No data for {sub_id} and trial {trial_idx}. Skipping this trial.")
            continue
        data = df_st.loc[:,["theta", "phi"]].to_numpy()
        sac, rad = ms_toolbox.microsacc(data, srate=sfreq, vfac=vfac, mindur=mindur)
        sac["trial_num"] = trial_idx
        sacs = pd.concat([sacs, sac], ignore_index=True)
        df_st["gaze_state"] = "fix"
        df_st["idx_fix"] = 0
        for i, row in sac.iterrows():
            onset = row["idx_onset"]
            offset = row["idx_offset"]
            df_st.loc[onset:offset+1, ["gaze_state"]] = "sacc"
            df_st.loc[onset:offset+1, ["idx_sacc"]] = i
            df_st.loc[offset:, "idx_fix"] = i+1
        df_gaze_subject = pd.concat([df_gaze_subject, df_st], ignore_index=True)
    sacs["sub_id"] = sub_id
    df_gaze_subject["idx_sacc"] = df_gaze_subject["idx_sacc"].fillna(-99)
    df_gaze_subject["idx_sacc"] = df_gaze_subject["idx_sacc"].astype(int)
    return (df_gaze_subject, sacs)


if RECALCULATE_SACC_AND_FIX_DATA:    
    # read in the preprocessed data
    pattern = "withoutblinks-preproc.csv"

    sub_list_str_et = [f for f in os.listdir(paths.DATA_ET_PREPROC) if pattern in f]
    sub_list_str_et = [f.split("-")[0] for f in sub_list_str_et]
    sub_list_str_et = np.unique(sorted(sub_list_str_et))

    data_preproc = []
    for sub_id in sorted(sub_list_str_et):
        fname = Path(paths.DATA_ET_PREPROC, f"{sub_id}-ET-{pattern}")
        df_clean = pd.read_csv(fname, sep=",")
        df_clean["sub_id"] = sub_id
        data_preproc.append(df_clean)

    df_all = pd.concat(data_preproc, ignore_index=True)

    # Choose subjects
    sub_id_selection = sub_list_str_et
    num_processes = len(sub_id_selection)

    # Prepare input
    trial_range = range(1, 745) # all trials

    args_list = [(df_all, trial_range, sub_id) for sub_id in sub_id_selection]

    # Run it on pool
    pool = Pool(processes=num_processes)
    results = pool.map(get_sacc_fix_per_sub, args_list)
    pool.close()
    pool.join()


    # Combine the gaze DFs for all subjects and write to disk
    df_gaze = pd.concat([res[0] for res in results], ignore_index=True)

    if WRITE_TO_DISK:
        fpath = paths.DATA_ET / "02_gaze"
        fpath.mkdir(exist_ok=True, parents=True)
        df_gaze.to_csv(Path(fpath, "gaze_all.csv"), index=False)
    
    
    # We do the same for the saccades DF, but first we add columns with the onset and offset time of the saccade:
    saccades = pd.concat([res[1] for res in results], ignore_index=True)
    sacc_times = (df_gaze
                    .query("gaze_state == 'sacc'")
                    .groupby(["sub_id", "trial_num", "idx_sacc"], as_index=False)
                    .agg({"times": ["first", "last"]})
                    .reset_index()
                )
    sacc_times.columns = list(map("".join, sacc_times.columns))
    sacc_times.columns = (sacc_times.rename(columns= {"timesfirst": "times_onset", 
                                            "timeslast": "times_offset"})
                                    .columns
                        )

    saccades = (saccades
                .assign(idx_sacc = lambda x:
                            (x.groupby(["sub_id", "trial_num"])
                            .cumcount()
                            )
                        )
                .merge(sacc_times.drop(["index"], axis=1),
                        on=["sub_id", "trial_num", "idx_sacc"])
            )

    saccades["amp_tot"] = np.sqrt(saccades["amp_x"]**2 + saccades["amp_y"]**2)
    saccades["angle"] = (saccades
                        .apply(lambda x: et_utils.angle_from_spherical(x.vec_x, x.vec_y), axis=1)
    )

    if WRITE_TO_DISK:
        fpath = paths.DATA_ET / "02_gaze"
        fpath.mkdir(exist_ok=True, parents=True)
        saccades.to_csv(Path(fpath, "saccades_all.csv"), index=False)


    fixations = (df_gaze
        .query("gaze_state == 'fix'")
        .groupby(["sub_id", "trial_num", "gaze_state", "idx_fix"], as_index=False)
        .agg({"times": ["first", "last"],
                "theta": ["mean", "min", "max"],
                "phi": ["mean", "min", "max"]})
        .reset_index()
    )
    fixations.columns = list(map("".join, fixations.columns))
    fixations.columns = (fixations.rename(columns={"timesfirst": "times_onset", "timeslast":
                                                "times_offset", "thetamean": "center_theta",
                                                "thetamin": "min_theta", "thetamax": "max_theta",
                                                "phimean": "center_phi", "phimin": "min_phi",
                                                "phimax": "max_phi"})
                                .columns
                        )
    fixations["duration"] = fixations["times_offset"] - fixations["times_onset"]
    fixations["spread_theta"] = fixations["max_theta"] - fixations["min_theta"]
    fixations["spread_phi"] = fixations["max_phi"] - fixations["min_phi"]

    if WRITE_TO_DISK:
        fpath = paths.DATA_ET / "02_gaze"
        fpath.mkdir(exist_ok=True, parents=True)
        fixations.to_csv(Path(fpath, "fixations_all.csv"), index=False)