In [26]:
import pandas as pd
import numpy as np

from stats import SaccadeLength, SaccadeAcceleration, FixationDuration, SaccadeVelocity, FixationVAD, RegressionCount
from extractor import Extractor

In [27]:
data = pd.concat([pd.read_excel('itog_fix_1.xlsx'), pd.read_excel('itog_fix_2.xlsx')], axis=0)

In [28]:
x = 'norm_pos_x'
y = 'norm_pos_y'
t = 'start_timestamp'
dur = 'duration'
dis = 'dispersion'

features = [
        SaccadeVelocity(stats=['mean', 'kurtosis', 'max']),
        FixationDuration(stats=['median', 'skew', 'var']),
        FixationVAD(stats=['mean', 'median']),
        SaccadeLength(stats=['var', 'std']),
        RegressionCount(),
        SaccadeAcceleration(stats=['min', 'var', 'mean'])
    ]
sac_vel = SaccadeVelocity(stats=['mean', 'kurtosis', 'max'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
fix_dur = FixationDuration(stats=['median', 'skew', 'var'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
reg_count = RegressionCount(x=x, y=y, t=t, pk=['tekst'])
fix_vad = FixationVAD(stats=['mean', 'median'], x=x, y=y, t=t, duration=dur,  dispersion=dis, pk=['tekst'])
sac_len = SaccadeLength(stats=['var', 'std'], x=x, y=y, t=t, duration=dur, pk=['tekst'])
sac_acc = SaccadeAcceleration(stats=['min', 'var', 'mean'], x=x, y=y, t=t, duration=dur, pk=['tekst'])

extractor = Extractor(features=features, x=x, y=y, t=t, duration=dur, dispersion=dis, pk=['tekst'],
                      return_df=True)

In [29]:
sac_len.transform(data)

Unnamed: 0,sac_len_var_1,sac_len_std_1,sac_len_var_2,sac_len_std_2,sac_len_var_3,sac_len_std_3,sac_len_var_4,sac_len_std_4,sac_len_var_5,sac_len_std_5,...,sac_len_var_33,sac_len_std_33,sac_len_var_34,sac_len_std_34,sac_len_var_35,sac_len_std_35,sac_len_var_36,sac_len_std_36,sac_len_var_37,sac_len_std_37
0,0.0086,0.092735,0.006856,0.0828,0.008787,0.093741,0.00875,0.093543,0.006019,0.077581,...,0.004574,0.067629,0.006926,0.08322,0.018313,0.135327,0.01331,0.115368,0.013424,0.115863


In [30]:
sac_acc.transform(data)

Unnamed: 0,sac_acc_min_1,sac_acc_var_1,sac_acc_mean_1,sac_acc_min_2,sac_acc_var_2,sac_acc_mean_2,sac_acc_min_3,sac_acc_var_3,sac_acc_mean_3,sac_acc_min_4,...,sac_acc_mean_34,sac_acc_min_35,sac_acc_var_35,sac_acc_mean_35,sac_acc_min_36,sac_acc_var_36,sac_acc_mean_36,sac_acc_min_37,sac_acc_var_37,sac_acc_mean_37
0,7.437532e-13,7096.232068,92.712374,1.364786e-12,6735.116723,82.582251,2.86006e-12,4745.36048,83.83454,3.514137e-12,...,102.881657,1.538127e-12,14091.651674,110.523075,2.508151e-12,12305.666053,112.434921,2.072115e-12,7886.442632,90.731849


In [31]:
fix_dur.transform(data)

Unnamed: 0,fix_dur_median_1,fix_dur_skew_1,fix_dur_var_1,fix_dur_median_2,fix_dur_skew_2,fix_dur_var_2,fix_dur_median_3,fix_dur_skew_3,fix_dur_var_3,fix_dur_median_4,...,fix_dur_var_34,fix_dur_median_35,fix_dur_skew_35,fix_dur_var_35,fix_dur_median_36,fix_dur_skew_36,fix_dur_var_36,fix_dur_median_37,fix_dur_skew_37,fix_dur_var_37
0,178.2125,1.857757,10509.516414,183.87525,1.284709,10140.758874,195.3575,1.518871,16487.438466,190.0345,...,14172.790904,156.328,3.939154,15912.971667,172.9265,0.790963,9222.189501,175.881,2.152412,10483.595051


In [32]:
sac_vel.transform(data)

Unnamed: 0,sac_vel_mean_1,sac_vel_kurtosis_1,sac_vel_max_1,sac_vel_mean_2,sac_vel_kurtosis_2,sac_vel_max_2,sac_vel_mean_3,sac_vel_kurtosis_3,sac_vel_max_3,sac_vel_mean_4,...,sac_vel_max_34,sac_vel_mean_35,sac_vel_kurtosis_35,sac_vel_max_35,sac_vel_mean_36,sac_vel_kurtosis_36,sac_vel_max_36,sac_vel_mean_37,sac_vel_kurtosis_37,sac_vel_max_37
0,3.002532,6.470488,13.717103,2.703573,1.763947,8.626671,2.830928,3.589073,10.717298,2.802656,...,6.728558,3.044436,37.115046,30.362497,3.032234,30.10733,22.777222,2.870006,6.351163,12.730611


In [33]:
reg_count.transform(data)

Unnamed: 0,reg_count_1,reg_count_2,reg_count_3,reg_count_4,reg_count_5,reg_count_6,reg_count_7,reg_count_8,reg_count_9,reg_count_10,...,reg_count_28,reg_count_29,reg_count_30,reg_count_31,reg_count_32,reg_count_33,reg_count_34,reg_count_35,reg_count_36,reg_count_37
0,269,164,152,151,155,173,171,192,218,225,...,130,161,104,86,113,166,137,220,104,135


In [34]:
fix_vad.transform(data)

Unnamed: 0,fix_disp_mean_1,fix_disp_median_1,fix_disp_mean_2,fix_disp_median_2,fix_disp_mean_3,fix_disp_median_3,fix_disp_mean_4,fix_disp_median_4,fix_disp_mean_5,fix_disp_median_5,...,fix_disp_mean_33,fix_disp_median_33,fix_disp_mean_34,fix_disp_median_34,fix_disp_mean_35,fix_disp_median_35,fix_disp_mean_36,fix_disp_median_36,fix_disp_mean_37,fix_disp_median_37
0,1.160423,1.204169,1.171774,1.233559,1.204409,1.263217,1.146726,1.186699,1.196206,1.255999,...,1.275482,1.357264,1.240055,1.305495,1.206376,1.282961,1.208544,1.29094,1.263215,1.362009


In [35]:
extractor.transform(data)

Unnamed: 0,sac_vel_mean_1,sac_vel_kurtosis_1,sac_vel_max_1,sac_vel_mean_2,sac_vel_kurtosis_2,sac_vel_max_2,sac_vel_mean_3,sac_vel_kurtosis_3,sac_vel_max_3,sac_vel_mean_4,...,sac_acc_mean_34,sac_acc_min_35,sac_acc_var_35,sac_acc_mean_35,sac_acc_min_36,sac_acc_var_36,sac_acc_mean_36,sac_acc_min_37,sac_acc_var_37,sac_acc_mean_37
0,3.002532,6.470488,13.717103,2.703573,1.763947,8.626671,2.830928,3.589073,10.717298,2.802656,...,102.881657,1.538127e-12,14091.651674,110.523075,2.508151e-12,12305.666053,112.434921,2.072115e-12,7886.442632,90.731849


In [36]:
sac_len.transform(data).shape, fix_dur.transform(data).shape, sac_vel, extractor.transform(data).shape

((1, 74),
 (1, 111),
 SaccadeVelocity(duration='duration', pk=['tekst'],
                 stats=['mean', 'kurtosis', 'max'], t='start_timestamp',
                 x='norm_pos_x', y='norm_pos_y'),
 (1, 518))

In [37]:
from scanpath_dist import get_expected_path

paths = get_expected_path(
    data=data,
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['tekst', 'Participant'],
    duration=dur
)

paths.keys()

dict_keys(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37'])

In [38]:
paths['1']

Unnamed: 0,x_est,y_est,duration_est
0,0.259898,0.500364,206.67675
1,0.249939,0.471984,167.37850
2,0.245635,0.460574,151.21150
3,0.240511,0.528885,186.05625
4,0.266589,0.529640,278.93000
...,...,...,...
166,0.460267,-0.072699,228.13350
167,0.461565,-0.063056,79.41750
168,0.334058,0.265421,131.70250
169,0.334674,0.273724,120.10650


In [39]:
from scanpath_dist import EucDist

euc = EucDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    return_df=True
)

euc.expected_paths

In [40]:
euc.fit_transform(data)

Unnamed: 0,euc_1_1,euc_1_2,euc_1_3,euc_1_4,euc_1_5,euc_1_6,euc_1_7,euc_1_8,euc_1_9,euc_1_10,...,euc_2_28,euc_2_29,euc_2_30,euc_2_31,euc_2_32,euc_2_33,euc_2_34,euc_2_35,euc_2_36,euc_2_37
0,3.048825,0.712929,0.756424,1.209765,1.005537,2.898817,2.672203,1.193188,1.233929,0.555947,...,10.925293,7.213775,4.191334,18.768847,1.690434,0.467097,1.311675,15.454267,9.711279,12.413386


In [41]:
from scanpath_dist import get_fill_path

expected_paths = get_expected_path(
    data=data,
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    return_df=True
)

fill_path = get_fill_path(
    list(expected_paths.values()), 
    x='x_est',
    y='y_est'
)

len(expected_paths)

37

In [42]:
euc = EucDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    return_df=False
)

euc.fit_transform(data)

array([[3.04882529e+00, 7.12929416e-01, 7.56424234e-01, 1.20976543e+00,
        1.00553738e+00, 2.89881665e+00, 2.67220346e+00, 1.19318825e+00,
        1.23392888e+00, 5.55946830e-01, 9.22253175e-01, 3.64479471e+03,
        2.16927370e+00, 1.87264471e+00, 1.54378723e+00, 2.08852628e+00,
        3.16528819e+00, 2.15572139e+00, 2.21280779e+00, 3.93178073e+02,
        7.81166543e+00, 5.04486619e+00, 6.97104208e+00, 1.02846197e+01,
        1.87397774e+00, 1.32104730e+00, 4.12898908e+00, 1.09252934e+01,
        7.21377495e+00, 4.19133394e+00, 1.87688466e+01, 1.69043359e+00,
        4.67097332e-01, 1.31167488e+00, 1.54542668e+01, 9.71127921e+00,
        1.24133861e+01, 3.04882529e+00, 7.12929416e-01, 7.56424234e-01,
        1.20976543e+00, 1.00553738e+00, 2.89881665e+00, 2.67220346e+00,
        1.19318825e+00, 1.23392888e+00, 5.55946830e-01, 9.22253175e-01,
        3.64479471e+03, 2.16927370e+00, 1.87264471e+00, 1.54378723e+00,
        2.08852628e+00, 3.16528819e+00, 2.15572139e+00, 2.212807

In [43]:
from scanpath_dist import HauDist

hau = HauDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    fill_path=fill_path,
    return_df=True
)

hau.fit_transform(data)

Unnamed: 0,hau_1_1,hau_1_2,hau_1_3,hau_1_4,hau_1_5,hau_1_6,hau_1_7,hau_1_8,hau_1_9,hau_1_10,...,hau_2_28,hau_2_29,hau_2_30,hau_2_31,hau_2_32,hau_2_33,hau_2_34,hau_2_35,hau_2_36,hau_2_37
0,0.194536,0.063802,0.066652,0.092369,0.10171,0.279688,0.138929,0.094807,0.147317,0.076706,...,2.255686,0.133862,0.321899,3.745765,0.116783,0.042363,0.23945,0.9164,0.755607,0.520875


In [44]:
from scanpath_dist import DTWDist

dtw = DTWDist(
    x=x,
    y=y,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    fill_path=fill_path,
    return_df=True
)

dtw.fit_transform(data)

Unnamed: 0,dtw_1_1,dtw_1_2,dtw_1_3,dtw_1_4,dtw_1_5,dtw_1_6,dtw_1_7,dtw_1_8,dtw_1_9,dtw_1_10,...,dtw_2_28,dtw_2_29,dtw_2_30,dtw_2_31,dtw_2_32,dtw_2_33,dtw_2_34,dtw_2_35,dtw_2_36,dtw_2_37
0,1.787453,0.510791,0.516659,0.832287,0.643137,5.604751,1.211802,0.637592,1.22664,0.382113,...,7.651892,1.219793,1.75252,18.448703,1.104477,0.280697,2.352949,19.138715,2.065866,5.525813


In [45]:
from scanpath_dist import ScanMatchDist

query_filter = "0 <= norm_pos_x <= 1 and 0 <= norm_pos_y <= 1"
data = data.query(query_filter)

scan_match = ScanMatchDist(
    x=x,
    y=y,
    duration=dur,
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    return_df=True
)

scan_match.fit_transform(data)

Unnamed: 0,sm_1_1,sm_1_2,sm_1_3,sm_1_4,sm_1_5,sm_1_6,sm_1_7,sm_1_8,sm_1_9,sm_1_10,...,sm_2_28,sm_2_29,sm_2_30,sm_2_31,sm_2_32,sm_2_33,sm_2_34,sm_2_35,sm_2_36,sm_2_37
0,66.0,37.0,46.0,39.0,24.0,56.0,40.0,40.0,59.0,16.0,...,70.0,37.0,55.0,40.0,29.0,30.0,99.0,119.0,44.0,64.0


In [46]:
from scanpath_dist import MannanDist

man = MannanDist(
    x='norm_pos_x',
    y='norm_pos_y',
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    fill_path=fill_path,
    return_df=True
)

man.fit_transform(data)

Unnamed: 0,man_1_1,man_1_2,man_1_3,man_1_4,man_1_5,man_1_6,man_1_7,man_1_8,man_1_9,man_1_10,...,man_2_28,man_2_29,man_2_30,man_2_31,man_2_32,man_2_33,man_2_34,man_2_35,man_2_36,man_2_37
0,0.030314,0.010513,0.015061,0.041748,0.02648,0.055122,0.033677,0.019075,0.025434,0.007313,...,0.11203,0.041382,0.074798,0.214245,0.058474,0.007162,0.058584,0.064088,0.061682,0.088011


In [47]:
from scanpath_dist import EyeAnalysisDist

ea = EyeAnalysisDist(
    x='norm_pos_x',
    y='norm_pos_y',
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    fill_path=fill_path,
    return_df=True
)

ea.fit_transform(data)

Unnamed: 0,ea_1_1,ea_1_2,ea_1_3,ea_1_4,ea_1_5,ea_1_6,ea_1_7,ea_1_8,ea_1_9,ea_1_10,...,ea_2_28,ea_2_29,ea_2_30,ea_2_31,ea_2_32,ea_2_33,ea_2_34,ea_2_35,ea_2_36,ea_2_37
0,0.003514,0.000441,0.000893,0.006898,0.002787,0.010185,0.004523,0.001455,0.002419,0.000214,...,0.048731,0.006725,0.012079,0.181125,0.013591,0.000205,0.009781,0.013502,0.013838,0.022915


In [48]:
from scanpath_dist import DFDist

df = DFDist(
    x='norm_pos_x',
    y='norm_pos_y',
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    fill_path=fill_path,
    return_df=True
)

df.fit_transform(data)

Unnamed: 0,df_1_1,df_1_2,df_1_3,df_1_4,df_1_5,df_1_6,df_1_7,df_1_8,df_1_9,df_1_10,...,df_2_28,df_2_29,df_2_30,df_2_31,df_2_32,df_2_33,df_2_34,df_2_35,df_2_36,df_2_37
0,0.065187,0.025128,0.028384,0.121818,0.044789,0.099986,0.072004,0.037141,0.04889,0.016119,...,4.294505,0.081079,0.056668,13.550874,1.298157,0.019605,0.347299,0.419847,0.16032,0.090202


In [49]:
from scanpath_dist import TDEDist

tde = TDEDist(
    k=10,
    x='norm_pos_x',
    y='norm_pos_y',
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    expected_paths=expected_paths,
    fill_path=fill_path,
    return_df=True
)

tde.fit_transform(data)

Unnamed: 0,tde_1_1,tde_1_2,tde_1_3,tde_1_4,tde_1_5,tde_1_6,tde_1_7,tde_1_8,tde_1_9,tde_1_10,...,tde_2_28,tde_2_29,tde_2_30,tde_2_31,tde_2_32,tde_2_33,tde_2_34,tde_2_35,tde_2_36,tde_2_37
0,0.502217,0.394022,0.442141,0.349533,0.372233,0.663066,0.444254,0.231162,0.278434,0.394892,...,0.867568,0.5728,1.392103,2.418899,0.714634,0.278778,0.688562,0.875638,0.476799,0.503963


In [50]:
from scanpath_dist import MultiMatchDist

mm = MultiMatchDist(
    x='norm_pos_x',
    y='norm_pos_y',
    duration='duration',
    path_pk=['tekst'],
    pk=['Participant', 'tekst'],
    return_df=True
)

mm.fit_transform(data)

Unnamed: 0,mm_shape_1_1,mm_angle_1_1,mm_len_1_1,mm_pos_1_1,mm_duration_1_1,mm_shape_1_2,mm_angle_1_2,mm_len_1_2,mm_pos_1_2,mm_duration_1_2,...,mm_shape_2_36,mm_angle_2_36,mm_len_2_36,mm_pos_2_36,mm_duration_2_36,mm_shape_2_37,mm_angle_2_37,mm_len_2_37,mm_pos_2_37,mm_duration_2_37
0,0.987139,0.930205,0.982722,0.918791,0.790491,0.994941,0.955519,0.996965,0.968464,0.879447,...,0.985054,0.723779,0.987757,0.811322,0.692067,0.984104,0.685248,0.983233,0.706524,0.670549
