In [1]:
import pandas as pd
import numpy as np

from stats import SaccadeLength, SaccadeAcceleration, FixationDuration, SaccadeVelocity, FixationVAD, RegressionCount, MicroSaccadeLength, MicroSaccadeVelocity, MicroSaccadeAcceleration
from measures import HurstExponent
from extractor import Extractor

In [2]:
data = pd.concat([pd.read_excel('itog_fix_1.xlsx'), pd.read_excel('itog_fix_2.xlsx')], axis=0)

In [3]:
x = 'norm_pos_x'
y = 'norm_pos_y'
t = 'start_timestamp'
dur = 'duration'
dis = 'dispersion'
msv = 4.7  # MS_VELOCITY_THRESHOLD
msa = 1.2  # MS_AMPLITUDE_THRESHOLD

features = [
    SaccadeVelocity(stats=['min', 'kurtosis', 'max']),
    FixationDuration(stats=['median', 'skew', 'var']),
    FixationVAD(stats=['mean', 'median']),
    SaccadeLength(stats=['var', 'std']),
    RegressionCount(),
    SaccadeAcceleration(stats=['min', 'var', 'mean']),
    MicroSaccadeVelocity(stats=['min'], min_dispersion=msa, max_velocity=msv),
    MicroSaccadeAcceleration(stats=['var'], min_dispersion=msa, max_velocity=msv),
    MicroSaccadeLength(stats=['max'], min_dispersion=msa, max_velocity=msv),
    HurstExponent(var='duration', n_iters=10, fill_strategy='reduce')
]
sac_vel = SaccadeVelocity(stats=['min', 'kurtosis', 'max'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
fix_dur = FixationDuration(stats=['median', 'skew', 'var'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
reg_count = RegressionCount(x=x, y=y, t=t, pk=['tekst'])
fix_vad = FixationVAD(stats=['mean', 'median'], x=x, y=y, t=t, duration=dur,  dispersion=dis, pk=['tekst'])
sac_len = SaccadeLength(stats=['var', 'std'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
sac_acc = SaccadeAcceleration(stats=['min', 'var', 'mean'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
microsac_vel = MicroSaccadeVelocity(stats=['min'], x=x, y=y, t=t, duration=dur, dispersion=dis, pk=['tekst'], min_dispersion=msa, max_velocity=msv)
microsac_len = MicroSaccadeLength(stats=['max'], x=x, y=y, t=t, duration=dur, dispersion=dis, pk=['tekst'], min_dispersion=msa, max_velocity=msv)
microsac_acc = MicroSaccadeAcceleration(stats=['max'], x=x, y=y, t=t, duration=dur, dispersion=dis, pk=['tekst'], min_dispersion=msa, max_velocity=msv)

he = HurstExponent(var='duration', n_iters=10, fill_strategy='reduce', pk=['tekst'])

extractor = Extractor(features=features, x=x, y=y, t=t, duration=dur, dispersion=dis, pk=['tekst'],
                      return_df=True)

In [4]:
sac_len.transform(data)

Unnamed: 0,sac_len_var_1,sac_len_std_1,sac_len_var_2,sac_len_std_2,sac_len_var_3,sac_len_std_3,sac_len_var_4,sac_len_std_4,sac_len_var_5,sac_len_std_5,...,sac_len_var_33,sac_len_std_33,sac_len_var_34,sac_len_std_34,sac_len_var_35,sac_len_std_35,sac_len_var_36,sac_len_std_36,sac_len_var_37,sac_len_std_37
0,0.0086,0.092735,0.006856,0.0828,0.008787,0.093741,0.00875,0.093543,0.006019,0.077581,...,0.004574,0.067629,0.006926,0.08322,0.018313,0.135327,0.01331,0.115368,0.013424,0.115863


In [5]:
sac_acc.transform(data)

Unnamed: 0,sac_acc_min_1,sac_acc_var_1,sac_acc_mean_1,sac_acc_min_2,sac_acc_var_2,sac_acc_mean_2,sac_acc_min_3,sac_acc_var_3,sac_acc_mean_3,sac_acc_min_4,...,sac_acc_mean_34,sac_acc_min_35,sac_acc_var_35,sac_acc_mean_35,sac_acc_min_36,sac_acc_var_36,sac_acc_mean_36,sac_acc_min_37,sac_acc_var_37,sac_acc_mean_37
0,7.437532e-13,7096.232068,92.712374,1.364786e-12,6735.116723,82.582251,2.86006e-12,4745.36048,83.83454,3.514137e-12,...,102.881657,1.538127e-12,14091.651674,110.523075,2.508151e-12,12305.666053,112.434921,2.072115e-12,7886.442632,90.731849


In [6]:
sac_vel.transform(data)

Unnamed: 0,sac_vel_min_1,sac_vel_kurtosis_1,sac_vel_max_1,sac_vel_min_2,sac_vel_kurtosis_2,sac_vel_max_2,sac_vel_min_3,sac_vel_kurtosis_3,sac_vel_max_3,sac_vel_min_4,...,sac_vel_max_34,sac_vel_min_35,sac_vel_kurtosis_35,sac_vel_max_35,sac_vel_min_36,sac_vel_kurtosis_36,sac_vel_max_36,sac_vel_min_37,sac_vel_kurtosis_37,sac_vel_max_37
0,-4.70974e-07,6.470488,13.717103,-8.64203e-07,1.763947,8.626671,-2e-06,3.589073,10.717298,-2e-06,...,6.728558,-9.748183e-07,37.115046,30.362497,-2e-06,30.10733,22.777222,-1e-06,6.351163,12.730611


In [7]:
microsac_len.transform(data)

Unnamed: 0,microsac_len_max_1,microsac_len_max_2,microsac_len_max_3,microsac_len_max_4,microsac_len_max_5,microsac_len_max_6,microsac_len_max_7,microsac_len_max_8,microsac_len_max_9,microsac_len_max_10,...,microsac_len_max_28,microsac_len_max_29,microsac_len_max_30,microsac_len_max_31,microsac_len_max_32,microsac_len_max_33,microsac_len_max_34,microsac_len_max_35,microsac_len_max_36,microsac_len_max_37
0,0.407159,0.264537,0.326784,0.704554,0.256157,0.409433,0.386975,0.506729,0.336658,0.526767,...,0.669959,0.510668,0.337793,0.604739,0.994827,0.255299,0.615122,0.308905,0.55976,0.41619


In [8]:
microsac_vel.transform(data)

Unnamed: 0,microsac_vel_min_1,microsac_vel_min_2,microsac_vel_min_3,microsac_vel_min_4,microsac_vel_min_5,microsac_vel_min_6,microsac_vel_min_7,microsac_vel_min_8,microsac_vel_min_9,microsac_vel_min_10,...,microsac_vel_min_28,microsac_vel_min_29,microsac_vel_min_30,microsac_vel_min_31,microsac_vel_min_32,microsac_vel_min_33,microsac_vel_min_34,microsac_vel_min_35,microsac_vel_min_36,microsac_vel_min_37
0,0.121675,0.023706,0.102426,-2e-06,0.018103,0.059586,-1e-06,0.048564,-6.945098e-07,-2e-06,...,-2e-06,0.168954,-1e-06,-2e-06,-1.20019e-07,-3.353352e-08,-2e-06,-9.748183e-07,-2e-06,-1e-06


In [9]:
microsac_acc.transform(data)

Unnamed: 0,microsac_acc_max_1,microsac_acc_max_2,microsac_acc_max_3,microsac_acc_max_4,microsac_acc_max_5,microsac_acc_max_6,microsac_acc_max_7,microsac_acc_max_8,microsac_acc_max_9,microsac_acc_max_10,...,microsac_acc_max_28,microsac_acc_max_29,microsac_acc_max_30,microsac_acc_max_31,microsac_acc_max_32,microsac_acc_max_33,microsac_acc_max_34,microsac_acc_max_35,microsac_acc_max_36,microsac_acc_max_37
0,393.225254,354.57246,321.539316,354.693595,392.2326,348.39345,390.17778,374.443031,389.183077,391.160826,...,390.951067,435.086608,393.793036,367.273491,360.393108,395.577717,456.902061,381.923962,407.521188,403.701064


In [10]:
fix_dur.transform(data)

Unnamed: 0,fix_dur_median_1,fix_dur_skew_1,fix_dur_var_1,fix_dur_median_2,fix_dur_skew_2,fix_dur_var_2,fix_dur_median_3,fix_dur_skew_3,fix_dur_var_3,fix_dur_median_4,...,fix_dur_var_34,fix_dur_median_35,fix_dur_skew_35,fix_dur_var_35,fix_dur_median_36,fix_dur_skew_36,fix_dur_var_36,fix_dur_median_37,fix_dur_skew_37,fix_dur_var_37
0,178.2125,1.857757,10509.516414,183.87525,1.284709,10140.758874,195.3575,1.518871,16487.438466,190.0345,...,14172.790904,156.328,3.939154,15912.971667,172.9265,0.790963,9222.189501,175.881,2.152412,10483.595051


In [11]:
reg_count.transform(data)

Unnamed: 0,reg_count_1,reg_count_2,reg_count_3,reg_count_4,reg_count_5,reg_count_6,reg_count_7,reg_count_8,reg_count_9,reg_count_10,...,reg_count_28,reg_count_29,reg_count_30,reg_count_31,reg_count_32,reg_count_33,reg_count_34,reg_count_35,reg_count_36,reg_count_37
0,269,164,152,151,155,173,171,192,218,225,...,130,161,104,86,113,166,137,220,104,135


In [12]:
fix_vad.transform(data)

Unnamed: 0,fix_disp_mean_1,fix_disp_median_1,fix_disp_mean_2,fix_disp_median_2,fix_disp_mean_3,fix_disp_median_3,fix_disp_mean_4,fix_disp_median_4,fix_disp_mean_5,fix_disp_median_5,...,fix_disp_mean_33,fix_disp_median_33,fix_disp_mean_34,fix_disp_median_34,fix_disp_mean_35,fix_disp_median_35,fix_disp_mean_36,fix_disp_median_36,fix_disp_mean_37,fix_disp_median_37
0,1.160423,1.204169,1.171774,1.233559,1.204409,1.263217,1.146726,1.186699,1.196206,1.255999,...,1.275482,1.357264,1.240055,1.305495,1.206376,1.282961,1.208544,1.29094,1.263215,1.362009


In [13]:
extractor.transform(data)

Unnamed: 0,sac_vel_min_1,sac_vel_kurtosis_1,sac_vel_max_1,sac_vel_min_2,sac_vel_kurtosis_2,sac_vel_max_2,sac_vel_min_3,sac_vel_kurtosis_3,sac_vel_max_3,sac_vel_min_4,...,hurst_exp_28,hurst_exp_29,hurst_exp_30,hurst_exp_31,hurst_exp_32,hurst_exp_33,hurst_exp_34,hurst_exp_35,hurst_exp_36,hurst_exp_37
0,-4.70974e-07,6.470488,13.717103,-8.64203e-07,1.763947,8.626671,-2e-06,3.589073,10.717298,-2e-06,...,0.007859,0.011344,0.019138,0.018031,0.01548,0.016096,0.016636,0.008879,0.018251,0.015218


In [14]:
he.transform(data)

Unnamed: 0,hurst_exp_1,hurst_exp_2,hurst_exp_3,hurst_exp_4,hurst_exp_5,hurst_exp_6,hurst_exp_7,hurst_exp_8,hurst_exp_9,hurst_exp_10,...,hurst_exp_28,hurst_exp_29,hurst_exp_30,hurst_exp_31,hurst_exp_32,hurst_exp_33,hurst_exp_34,hurst_exp_35,hurst_exp_36,hurst_exp_37
0,0.010769,0.012475,0.017373,0.017302,0.014782,0.01727,0.016498,0.016379,0.015217,0.009344,...,0.007859,0.011344,0.019138,0.018031,0.01548,0.016096,0.016636,0.008879,0.018251,0.015218


In [15]:
sac_len.transform(data).shape, fix_dur.transform(data).shape, sac_vel, extractor.transform(data).shape

((1, 74),
 (1, 111),
 SaccadeVelocity(duration='duration', pk=['tekst'],
                 stats=['min', 'kurtosis', 'max'], t='start_timestamp',
                 x='norm_pos_x', y='norm_pos_y'),
 (1, 666))

In [16]:
from scanpath_dist import get_expected_path

paths = get_expected_path(
    data=data,
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['tekst', 'Participant'],
    duration=dur
)

paths.keys()

dict_keys(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37'])

In [17]:
paths['1']

Unnamed: 0,x_est,y_est,duration_est
0,0.259898,0.500364,206.67675
1,0.249939,0.471984,167.37850
2,0.245635,0.460574,151.21150
3,0.240511,0.528885,186.05625
4,0.266589,0.529640,278.93000
...,...,...,...
166,0.460267,-0.072699,228.13350
167,0.461565,-0.063056,79.41750
168,0.334058,0.265421,131.70250
169,0.334674,0.273724,120.10650


In [18]:
from scanpath_dist import EucDist

euc = EucDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    return_df=True
)

euc.expected_paths

In [19]:
euc.fit_transform(data)

Unnamed: 0,euc_1_1,euc_1_2,euc_1_3,euc_1_4,euc_1_5,euc_1_6,euc_1_7,euc_1_8,euc_1_9,euc_1_10,...,euc_2_28,euc_2_29,euc_2_30,euc_2_31,euc_2_32,euc_2_33,euc_2_34,euc_2_35,euc_2_36,euc_2_37
0,3.048825,0.712929,0.756424,1.209765,1.005537,2.898817,2.672203,1.193188,1.233929,0.555947,...,10.925293,7.213775,4.191334,18.768847,1.690434,0.467097,1.311675,15.454267,9.711279,12.413386


In [20]:
from scanpath_dist import get_fill_path

expected_paths = get_expected_path(
    data=data,
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    return_df=True
)

fill_path = get_fill_path(
    list(expected_paths.values()), 
    x='x_est',
    y='y_est'
)

len(expected_paths)

37

In [21]:
euc = EucDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    return_df=False
)

euc.fit_transform(data)

array([[3.04882529e+00, 7.12929416e-01, 7.56424234e-01, 1.20976543e+00,
        1.00553738e+00, 2.89881665e+00, 2.67220346e+00, 1.19318825e+00,
        1.23392888e+00, 5.55946830e-01, 9.22253175e-01, 3.64479471e+03,
        2.16927370e+00, 1.87264471e+00, 1.54378723e+00, 2.08852628e+00,
        3.16528819e+00, 2.15572139e+00, 2.21280779e+00, 3.93178073e+02,
        7.81166543e+00, 5.04486619e+00, 6.97104208e+00, 1.02846197e+01,
        1.87397774e+00, 1.32104730e+00, 4.12898908e+00, 1.09252934e+01,
        7.21377495e+00, 4.19133394e+00, 1.87688466e+01, 1.69043359e+00,
        4.67097332e-01, 1.31167488e+00, 1.54542668e+01, 9.71127921e+00,
        1.24133861e+01, 3.04882529e+00, 7.12929416e-01, 7.56424234e-01,
        1.20976543e+00, 1.00553738e+00, 2.89881665e+00, 2.67220346e+00,
        1.19318825e+00, 1.23392888e+00, 5.55946830e-01, 9.22253175e-01,
        3.64479471e+03, 2.16927370e+00, 1.87264471e+00, 1.54378723e+00,
        2.08852628e+00, 3.16528819e+00, 2.15572139e+00, 2.212807

In [22]:
from scanpath_dist import HauDist

hau = HauDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    fill_path=fill_path,
    return_df=True
)

hau.fit_transform(data)

Unnamed: 0,hau_1_1,hau_1_2,hau_1_3,hau_1_4,hau_1_5,hau_1_6,hau_1_7,hau_1_8,hau_1_9,hau_1_10,...,hau_2_28,hau_2_29,hau_2_30,hau_2_31,hau_2_32,hau_2_33,hau_2_34,hau_2_35,hau_2_36,hau_2_37
0,0.194536,0.063802,0.066652,0.092369,0.10171,0.279688,0.138929,0.094807,0.147317,0.076706,...,2.255686,0.133862,0.321899,3.745765,0.116783,0.042363,0.23945,0.9164,0.755607,0.520875


In [23]:
from scanpath_dist import DTWDist

dtw = DTWDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    fill_path=fill_path,
    return_df=True
)

dtw.fit_transform(data)

Unnamed: 0,dtw_1_1,dtw_1_2,dtw_1_3,dtw_1_4,dtw_1_5,dtw_1_6,dtw_1_7,dtw_1_8,dtw_1_9,dtw_1_10,...,dtw_2_28,dtw_2_29,dtw_2_30,dtw_2_31,dtw_2_32,dtw_2_33,dtw_2_34,dtw_2_35,dtw_2_36,dtw_2_37
0,1.787453,0.510791,0.516659,0.832287,0.643137,5.604751,1.211802,0.637592,1.22664,0.382113,...,7.651892,1.219793,1.75252,18.448703,1.104477,0.280697,2.352949,19.138715,2.065866,5.525813


In [24]:
from scanpath_dist import ScanMatchDist

query_filter = "0 <= norm_pos_x <= 1 and 0 <= norm_pos_y <= 1"
data = data.query(query_filter)

scan_match = ScanMatchDist(
    x=x,
    y=y,
    duration=dur,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    return_df=True
)

scan_match.fit_transform(data)

Unnamed: 0,sm_1_1,sm_1_2,sm_1_3,sm_1_4,sm_1_5,sm_1_6,sm_1_7,sm_1_8,sm_1_9,sm_1_10,...,sm_2_28,sm_2_29,sm_2_30,sm_2_31,sm_2_32,sm_2_33,sm_2_34,sm_2_35,sm_2_36,sm_2_37
0,66.0,37.0,46.0,39.0,24.0,56.0,40.0,40.0,59.0,16.0,...,70.0,37.0,55.0,40.0,29.0,30.0,99.0,119.0,44.0,64.0
