In [1]:
import pandas as pd
import numpy as np

import os
from tqdm import tqdm

from stats import SaccadeLength, SaccadeAcceleration, FixationDuration, SaccadeVelocity, FixationVAD, RegressionCount, MicroSaccadeLength, MicroSaccadeVelocity, MicroSaccadeAcceleration
from measures import HurstExponent
from extractor import Extractor

import warnings
warnings.simplefilter('ignore')

In [2]:
path_to_data = './gaze'
participant_ids = sorted(list(int(x) for x in set(os.listdir(path_to_data)) if x.isdigit()))
def load_dataset():
    fix_data = []
    for participant in tqdm(participant_ids):
        fix_data.append(pd.read_excel(f'{path_to_data}/{participant}/itog_fix_{participant}.xlsx'))
    return pd.concat(fix_data, ignore_index=True)

print(participant_ids)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40, 42, 43, 44, 46, 47, 48, 49, 50, 51]


In [3]:
benchmark_data = load_dataset()
benchmark_data.shape

100%|██████████| 46/46 [00:12<00:00,  3.77it/s]


(164618, 11)

In [4]:
x = 'norm_pos_x'
y = 'norm_pos_y'
t = 'start_timestamp'
dur = 'duration'
dis = 'dispersion'
msv = 4.7  # MS_VELOCITY_THRESHOLD
msa = 1.2  # MS_AMPLITUDE_THRESHOLD

## 1. Stats

In [5]:
features = [
    SaccadeVelocity(stats=['min', 'kurtosis', 'max']),
    FixationDuration(stats=['median', 'skew', 'var']),
    FixationVAD(stats=['mean', 'median']),
    SaccadeLength(stats=['var', 'std']),
    RegressionCount(),
    SaccadeAcceleration(stats=['min', 'var', 'mean']),
    MicroSaccadeVelocity(stats=['min'], min_dispersion=msa, max_velocity=msv),
    MicroSaccadeAcceleration(stats=['var'], min_dispersion=msa, max_velocity=msv),
    MicroSaccadeLength(stats=['max'], min_dispersion=msa, max_velocity=msv),
    HurstExponent(var='duration', n_iters=10, fill_strategy='last')
]

### 1.1 Grouping by texts

In [6]:
extractor = Extractor(
    features=features,
    x=x, y=y, t=t,
    duration=dur,
    dispersion=dis, pk=['tekst'], return_df=True
)

%time extractor.fit_transform(benchmark_data).shape

CPU times: user 26.7 s, sys: 213 ms, total: 27 s
Wall time: 26.9 s


(37, 18)

### 1.2 Grouping by participants

In [7]:
extractor = Extractor(
    features=features,
    x=x, y=y, t=t,
    duration=dur,
    dispersion=dis, pk=['Participant'], return_df=True
)

%time extractor.fit_transform(benchmark_data).shape

CPU times: user 1.08 s, sys: 144 ms, total: 1.22 s
Wall time: 1.06 s


(46, 18)

### 1.3 Grouping by texts and participants

In [8]:
extractor = Extractor(
    features=features,
    x=x, y=y, t=t,
    duration=dur,
    dispersion=dis, pk=['Participant', 'tekst'], return_df=True
)

%time extractor.fit_transform(benchmark_data).shape

CPU times: user 26.7 s, sys: 1.82 s, total: 28.5 s
Wall time: 26.2 s


(1663, 18)

## 2. Distances

In [9]:
from scanpath_dist import EucDist, HauDist, DTWDist, MannanDist, EyeAnalysisDist, DFDist, TDEDist

### 2.1 Simple methods

In [11]:
features = [
    EucDist(), 
    # HauDist(),
    # DTWDist(), 
    # MannanDist(),
    # EyeAnalysisDist(),
    # DFDist(),
]

extractor = Extractor(
    features=features,
    x=x, y=y, t=t,
    duration=dur,
    dispersion=dis, pk=['Participant', 'tekst'], path_pk=['tekst'], return_df=True
)

%time extractor.fit_transform(benchmark_data).shape

CPU times: user 28.8 s, sys: 36.1 ms, total: 28.8 s
Wall time: 28.8 s


(1663, 1)