In [2]:
import pandas as pd
import numpy as np

from stats import SaccadeLength, SaccadeAcceleration, FixationDuration, SaccadeVelocity, FixationVAD, RegressionCount, MicroSaccadeLength, MicroSaccadeVelocity, MicroSaccadeAcceleration, RegressionLength, RegressionVelocity, RegressionAcceleration
from extractor import Extractor

In [3]:
data = pd.concat([pd.read_excel('itog_fix_1.xlsx'), pd.read_excel('itog_fix_2.xlsx')], axis=0)

In [4]:
x = 'norm_pos_x'
y = 'norm_pos_y'
t = 'start_timestamp'
dur = 'duration'
dis = 'dispersion'
msv = 4.7  # MS_VELOCITY_THRESHOLD
msa = 1.2  # MS_AMPLITUDE_THRESHOLD

features = [
    SaccadeVelocity(stats=['min', 'kurtosis', 'max']),
    FixationDuration(stats=['median', 'skew', 'var']),
    FixationVAD(stats=['mean', 'median']),
    SaccadeLength(stats=['var', 'std', 'min']),
    RegressionCount(),
    SaccadeAcceleration(stats=['min', 'var', 'mean']),
    MicroSaccadeVelocity(stats=['min'], min_dispersion=msa, max_velocity=msv),
    MicroSaccadeAcceleration(stats=['var'], min_dispersion=msa, max_velocity=msv),
    MicroSaccadeLength(stats=['max'], min_dispersion=msa, max_velocity=msv),
    RegressionLength(stats=['var', 'std', 'min']),
    RegressionVelocity(stats=['min', 'kurtosis', 'max']),
    RegressionAcceleration(stats=['min', 'var', 'mean'])
]
sac_vel = SaccadeVelocity(stats=['min', 'kurtosis', 'max'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
fix_dur = FixationDuration(stats=['median', 'skew', 'var'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
reg_count = RegressionCount(x=x, y=y, t=t, pk=['tekst'])
fix_vad = FixationVAD(stats=['mean', 'median'], x=x, y=y, t=t, duration=dur,  dispersion=dis, pk=['tekst'])
sac_len = SaccadeLength(stats=['var', 'std', 'min'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
sac_acc = SaccadeAcceleration(stats=['min', 'var', 'mean', 'max'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
microsac_vel = MicroSaccadeVelocity(stats=['min'], x=x, y=y, t=t, duration=dur, dispersion=dis, pk=['tekst'], min_dispersion=msa, max_velocity=msv)
microsac_len = MicroSaccadeLength(stats=['max'], x=x, y=y, t=t, duration=dur, dispersion=dis, pk=['tekst'], min_dispersion=msa, max_velocity=msv)
microsac_acc = MicroSaccadeAcceleration(stats=['max'], x=x, y=y, t=t, duration=dur, dispersion=dis, pk=['tekst'], min_dispersion=msa, max_velocity=msv)
reg_len = RegressionLength(stats=['var', 'std', 'min'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
reg_vel = RegressionVelocity(stats=['min', 'kurtosis', 'max'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
reg_acc = RegressionAcceleration(stats=['min', 'var', 'mean', 'max'], x=x, y=y, t=t, duration=dur, pk=['tekst'])

extractor = Extractor(features=features, x=x, y=y, t=t, duration=dur, dispersion=dis, pk=['tekst'],
                      return_df=True)

In [5]:
sac_len.transform(data)

Unnamed: 0,sac_len_var_1,sac_len_std_1,sac_len_min_1,sac_len_var_2,sac_len_std_2,sac_len_min_2,sac_len_var_3,sac_len_std_3,sac_len_min_3,sac_len_var_4,...,sac_len_min_34,sac_len_var_35,sac_len_std_35,sac_len_min_35,sac_len_var_36,sac_len_std_36,sac_len_min_36,sac_len_var_37,sac_len_std_37,sac_len_min_37
0,0.0086,0.092735,0.00265,0.006856,0.0828,0.003741,0.008787,0.093741,0.003351,0.00875,...,0.00178,0.018313,0.135327,0.00217,0.01331,0.115368,0.002196,0.013424,0.115863,0.002151


In [6]:
reg_len.transform(data)

Unnamed: 0,reg_len_var_1,reg_len_std_1,reg_len_min_1,reg_len_var_2,reg_len_std_2,reg_len_min_2,reg_len_var_3,reg_len_std_3,reg_len_min_3,reg_len_var_4,...,reg_len_min_34,reg_len_var_35,reg_len_std_35,reg_len_min_35,reg_len_var_36,reg_len_std_36,reg_len_min_36,reg_len_var_37,reg_len_std_37,reg_len_min_37
0,0.010025,0.100124,0.00265,0.008756,0.093575,0.003741,0.011025,0.104999,0.003351,0.010228,...,0.00178,0.026438,0.162596,0.00217,0.021943,0.14813,0.004319,0.022163,0.148871,0.002151


In [7]:
sac_acc.transform(data)

Unnamed: 0,sac_acc_min_1,sac_acc_var_1,sac_acc_mean_1,sac_acc_max_1,sac_acc_min_2,sac_acc_var_2,sac_acc_mean_2,sac_acc_max_2,sac_acc_min_3,sac_acc_var_3,...,sac_acc_mean_35,sac_acc_max_35,sac_acc_min_36,sac_acc_var_36,sac_acc_mean_36,sac_acc_max_36,sac_acc_min_37,sac_acc_var_37,sac_acc_mean_37,sac_acc_max_37
0,7.437532e-13,7096.232068,92.712374,567.604993,1.364786e-12,6735.116723,82.582251,513.78328,2.86006e-12,4745.36048,...,110.523075,1419.243216,2.508151e-12,12305.666053,112.434921,691.417759,2.072115e-12,7886.442632,90.731849,696.469091


In [8]:
reg_acc.transform(data)

Unnamed: 0,reg_acc_min_1,reg_acc_var_1,reg_acc_mean_1,reg_acc_max_1,reg_acc_min_2,reg_acc_var_2,reg_acc_mean_2,reg_acc_max_2,reg_acc_min_3,reg_acc_var_3,...,reg_acc_mean_35,reg_acc_max_35,reg_acc_min_36,reg_acc_var_36,reg_acc_mean_36,reg_acc_max_36,reg_acc_min_37,reg_acc_var_37,reg_acc_mean_37,reg_acc_max_37
0,0.053597,258379.220618,192.503842,6138.680073,1.364786e-12,334387.714537,179.512883,5993.707155,2.86006e-12,13042.797713,...,112.432628,664.616126,0.330045,9888.632278,106.202628,580.551488,2.072115e-12,9591.713138,96.316443,696.469091


In [9]:
sac_vel.transform(data)

Unnamed: 0,sac_vel_min_1,sac_vel_kurtosis_1,sac_vel_max_1,sac_vel_min_2,sac_vel_kurtosis_2,sac_vel_max_2,sac_vel_min_3,sac_vel_kurtosis_3,sac_vel_max_3,sac_vel_min_4,...,sac_vel_max_34,sac_vel_min_35,sac_vel_kurtosis_35,sac_vel_max_35,sac_vel_min_36,sac_vel_kurtosis_36,sac_vel_max_36,sac_vel_min_37,sac_vel_kurtosis_37,sac_vel_max_37
0,-4.70974e-07,6.470488,13.717103,-8.64203e-07,1.763947,8.626671,-2e-06,3.589073,10.717298,-2e-06,...,6.728558,-9.748183e-07,37.115046,30.362497,-2e-06,30.10733,22.777222,-1e-06,6.351163,12.730611


In [10]:
reg_vel.transform(data)

Unnamed: 0,reg_vel_min_1,reg_vel_kurtosis_1,reg_vel_max_1,reg_vel_min_2,reg_vel_kurtosis_2,reg_vel_max_2,reg_vel_min_3,reg_vel_kurtosis_3,reg_vel_max_3,reg_vel_min_4,...,reg_vel_max_34,reg_vel_min_35,reg_vel_kurtosis_35,reg_vel_max_35,reg_vel_min_36,reg_vel_kurtosis_36,reg_vel_max_36,reg_vel_min_37,reg_vel_kurtosis_37,reg_vel_max_37
0,0.027055,35.266575,69.219949,-8.64203e-07,67.755907,71.982461,-2e-06,6.951704,14.87638,-2e-06,...,6.728558,-9.748183e-07,16.349957,22.777222,0.098512,31.918734,22.777222,-1e-06,3.931374,12.730611


In [11]:
microsac_len.transform(data)

Unnamed: 0,microsac_len_max_1,microsac_len_max_2,microsac_len_max_3,microsac_len_max_4,microsac_len_max_5,microsac_len_max_6,microsac_len_max_7,microsac_len_max_8,microsac_len_max_9,microsac_len_max_10,...,microsac_len_max_28,microsac_len_max_29,microsac_len_max_30,microsac_len_max_31,microsac_len_max_32,microsac_len_max_33,microsac_len_max_34,microsac_len_max_35,microsac_len_max_36,microsac_len_max_37
0,0.407159,0.264537,0.326784,0.704554,0.256157,0.409433,0.386975,0.506729,0.336658,0.526767,...,0.669959,0.510668,0.337793,0.604739,0.994827,0.255299,0.615122,0.308905,0.55976,0.41619


In [12]:
microsac_vel.transform(data)

Unnamed: 0,microsac_vel_min_1,microsac_vel_min_2,microsac_vel_min_3,microsac_vel_min_4,microsac_vel_min_5,microsac_vel_min_6,microsac_vel_min_7,microsac_vel_min_8,microsac_vel_min_9,microsac_vel_min_10,...,microsac_vel_min_28,microsac_vel_min_29,microsac_vel_min_30,microsac_vel_min_31,microsac_vel_min_32,microsac_vel_min_33,microsac_vel_min_34,microsac_vel_min_35,microsac_vel_min_36,microsac_vel_min_37
0,0.121675,0.023706,0.102426,-2e-06,0.018103,0.059586,-1e-06,0.048564,-6.945098e-07,-2e-06,...,-2e-06,0.168954,-1e-06,-2e-06,-1.20019e-07,-3.353352e-08,-2e-06,-9.748183e-07,-2e-06,-1e-06


In [13]:
microsac_acc.transform(data)

Unnamed: 0,microsac_acc_max_1,microsac_acc_max_2,microsac_acc_max_3,microsac_acc_max_4,microsac_acc_max_5,microsac_acc_max_6,microsac_acc_max_7,microsac_acc_max_8,microsac_acc_max_9,microsac_acc_max_10,...,microsac_acc_max_28,microsac_acc_max_29,microsac_acc_max_30,microsac_acc_max_31,microsac_acc_max_32,microsac_acc_max_33,microsac_acc_max_34,microsac_acc_max_35,microsac_acc_max_36,microsac_acc_max_37
0,393.225254,354.57246,321.539316,354.693595,392.2326,348.39345,390.17778,374.443031,389.183077,391.160826,...,390.951067,435.086608,393.793036,367.273491,360.393108,395.577717,456.902061,381.923962,407.521188,403.701064


In [14]:
fix_dur.transform(data)

Unnamed: 0,fix_dur_median_1,fix_dur_skew_1,fix_dur_var_1,fix_dur_median_2,fix_dur_skew_2,fix_dur_var_2,fix_dur_median_3,fix_dur_skew_3,fix_dur_var_3,fix_dur_median_4,...,fix_dur_var_34,fix_dur_median_35,fix_dur_skew_35,fix_dur_var_35,fix_dur_median_36,fix_dur_skew_36,fix_dur_var_36,fix_dur_median_37,fix_dur_skew_37,fix_dur_var_37
0,178.2125,1.857757,10509.516414,183.87525,1.284709,10140.758874,195.3575,1.518871,16487.438466,190.0345,...,14172.790904,156.328,3.939154,15912.971667,172.9265,0.790963,9222.189501,175.881,2.152412,10483.595051


In [15]:
reg_count.transform(data)

Unnamed: 0,reg_count_1,reg_count_2,reg_count_3,reg_count_4,reg_count_5,reg_count_6,reg_count_7,reg_count_8,reg_count_9,reg_count_10,...,reg_count_28,reg_count_29,reg_count_30,reg_count_31,reg_count_32,reg_count_33,reg_count_34,reg_count_35,reg_count_36,reg_count_37
0,269,164,152,151,155,173,171,192,218,225,...,130,161,104,86,113,166,137,220,104,135


In [16]:
fix_vad.transform(data)

Unnamed: 0,fix_disp_mean_1,fix_disp_median_1,fix_disp_mean_2,fix_disp_median_2,fix_disp_mean_3,fix_disp_median_3,fix_disp_mean_4,fix_disp_median_4,fix_disp_mean_5,fix_disp_median_5,...,fix_disp_mean_33,fix_disp_median_33,fix_disp_mean_34,fix_disp_median_34,fix_disp_mean_35,fix_disp_median_35,fix_disp_mean_36,fix_disp_median_36,fix_disp_mean_37,fix_disp_median_37
0,1.160423,1.204169,1.171774,1.233559,1.204409,1.263217,1.146726,1.186699,1.196206,1.255999,...,1.275482,1.357264,1.240055,1.305495,1.206376,1.282961,1.208544,1.29094,1.263215,1.362009


In [17]:
extractor.transform(data)

Unnamed: 0,sac_vel_min_1,sac_vel_kurtosis_1,sac_vel_max_1,sac_vel_min_2,sac_vel_kurtosis_2,sac_vel_max_2,sac_vel_min_3,sac_vel_kurtosis_3,sac_vel_max_3,sac_vel_min_4,...,reg_acc_mean_34,reg_acc_min_35,reg_acc_var_35,reg_acc_mean_35,reg_acc_min_36,reg_acc_var_36,reg_acc_mean_36,reg_acc_min_37,reg_acc_var_37,reg_acc_mean_37
0,-4.70974e-07,6.470488,13.717103,-8.64203e-07,1.763947,8.626671,-2e-06,3.589073,10.717298,-2e-06,...,111.247524,1.538127e-12,9415.483648,112.432628,0.330045,9888.632278,106.202628,2.072115e-12,9591.713138,96.316443


In [18]:
sac_len.transform(data).shape, fix_dur.transform(data).shape, sac_vel, extractor.transform(data).shape

((1, 111),
 (1, 111),
 SaccadeVelocity(duration='duration', pk=['tekst'],
                 stats=['min', 'kurtosis', 'max'], t='start_timestamp',
                 x='norm_pos_x', y='norm_pos_y'),
 (1, 999))

In [19]:
from scanpath_dist import get_expected_path

paths = get_expected_path(
    data=data,
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['tekst', 'Participant'],
    duration=dur
)

paths.keys()

ModuleNotFoundError: No module named 'multimatch_gaze'

In [None]:
paths['1']

In [None]:
from scanpath_dist import EucDist

euc = EucDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    return_df=True
)

euc.expected_paths

In [None]:
euc.fit_transform(data)

In [None]:
from scanpath_dist import get_fill_path

expected_paths = get_expected_path(
    data=data,
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    return_df=True
)

fill_path = get_fill_path(
    list(expected_paths.values()), 
    x='x_est',
    y='y_est'
)

len(expected_paths)

In [None]:
euc = EucDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    return_df=False
)

euc.fit_transform(data)

In [None]:
from scanpath_dist import HauDist

hau = HauDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    fill_path=fill_path,
    return_df=True
)

hau.fit_transform(data)

In [None]:
from scanpath_dist import DTWDist

dtw = DTWDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    fill_path=fill_path,
    return_df=True
)

dtw.fit_transform(data)

In [None]:
from scanpath_dist import ScanMatchDist

query_filter = "0 <= norm_pos_x <= 1 and 0 <= norm_pos_y <= 1"
data = data.query(query_filter)

scan_match = ScanMatchDist(
    x=x,
    y=y,
    duration=dur,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    return_df=True
)

scan_match.fit_transform(data)