In [1]:
import pandas as pd
import numpy as np

from os.path import join
from stats import SaccadeFeatures, FixationFeatures, RegressionFeatures, MicroSaccades
from extractor import Extractor

DATA_PATH = join('..', 'test_data')

In [2]:
data = pd.concat([pd.read_excel(join(DATA_PATH, 'itog_fix_1.xlsx')),
                  pd.read_excel(join(DATA_PATH, 'itog_fix_2.xlsx'))],
                 axis=0)

In [3]:
data['AOI'] = 'aoi_1'

for i in data.index[::5]:
    data.loc[i, 'AOI'] = 'aoi_2'
for i in data.index[::6]:
    data.loc[i, 'AOI'] = 'aoi_2'
for i in data.index[::7]:
    data.loc[i, 'AOI'] = 'aoi_2'

In [4]:
data['AOI'].value_counts()

AOI
aoi_2    5304
aoi_1    3233
Name: count, dtype: int64

In [5]:
x = 'norm_pos_x'
y = 'norm_pos_y'
t = 'start_timestamp'
dur = 'duration'
dis = 'dispersion'


sac_feats_stats = {
    'length': ['min', 'max'],
    'speed': ['mean', 'kurtosis'],
    'acceleration': ['mean']
}
sac_feats_stats_shift = {'length': ['max'],
                         'acceleration': ['mean']}

sf = SaccadeFeatures(x=x, y=y, t=t,
                     pk=['Participant', 'tekst'],
                     features_stats=sac_feats_stats,
                     shift_features=sac_feats_stats_shift,
                     shift_pk=['tekst'])


fix_feats_stats = {'duration': ['sum'], 'vad': ['mean']}

fx = FixationFeatures(duration=dur, dispersion=dis,
                     pk=['Participant', 'tekst'],
                     aoi='AOI',
                     features_stats=fix_feats_stats,
                     shift_pk=['Participant'])

rg = RegressionFeatures(x=x, y=y, t=t, duration=dur,
                        pk=['Participant', 'tekst'],
                        aoi='AOI',
                        features_stats=sac_feats_stats,
                        shift_features=sac_feats_stats_shift,
                        shift_pk=['tekst'],
                        rule=(1, 3))

ms = MicroSaccades(x=x, y=y, t=t, dispersion=dis,
                   pk=['Participant', 'tekst'],
                   aoi='AOI',
                   features_stats=sac_feats_stats,
                   shift_features=sac_feats_stats_shift,
                   shift_pk=['tekst'],
                   min_dispersion=0.01,
                   max_speed=0.2)

features = [
    SaccadeFeatures(features_stats=sac_feats_stats,
                    shift_features=sac_feats_stats_shift,
                    shift_pk=['tekst']),
    FixationFeatures(features_stats=fix_feats_stats,
                     shift_pk=['Participant']),
    RegressionFeatures(features_stats=sac_feats_stats,
                       shift_features=sac_feats_stats_shift,
                       shift_pk=['tekst'],
                       rule=(1, 3)),
    MicroSaccades(features_stats=sac_feats_stats,
                  shift_features=sac_feats_stats_shift,
                  shift_pk=['Participant'],
                  min_dispersion=0.001,
                  max_speed=0.5)
]

extractor = Extractor(features=features, x=x, y=y, t=t, duration=dur, dispersion=dis,
                      aoi='AOI',
                      pk=['Participant', 'tekst'],
                      return_df=True)

In [6]:
sf.fit_transform(data)

Unnamed: 0,sac_length_min,sac_length_max,sac_length_max_shift,sac_acceleration_mean,sac_acceleration_mean_shift,sac_speed_mean,sac_speed_kurtosis
1_1,0.002657,0.433486,-0.008934,1.522857,0.316149,0.488956,5.498018
1_2,0.003757,0.321837,-0.127119,1.507806,0.322016,0.451773,8.356764
1_3,0.003663,0.365776,-0.207636,1.292694,0.083609,0.390753,7.639049
1_4,0.000212,0.342315,-0.362239,0.999054,-0.038003,0.333495,9.593384
1_5,0.002705,0.375434,-0.094030,1.187418,0.089377,0.360223,9.717641
...,...,...,...,...,...,...,...
2_33,0.001920,0.406550,0.000000,0.945929,-0.050745,0.264515,10.235914
2_34,0.007928,0.354386,-0.260736,0.883503,-0.158937,0.298396,9.732734
2_35,0.002170,1.269198,0.000000,2.527952,0.883969,0.623824,45.476927
2_36,0.002196,1.124204,0.000000,1.650884,0.266994,0.443600,10.414252


In [7]:
fx.fit_transform(data)

Unnamed: 0,fix_duration_aoi_2_sum,fix_vad_aoi_2_mean,fix_duration_aoi_1_sum,fix_vad_aoi_1_mean
1_1,10760.106500,1.084216,2504.5290,1.173406
1_2,9308.563500,1.099930,1652.0625,1.427237
1_3,9218.071000,1.219697,1645.0175,1.014305
1_4,7324.355999,1.125739,952.0735,1.285798
1_5,9060.284000,1.148466,1942.1885,1.344456
...,...,...,...,...
2_33,13896.132000,1.376585,2428.7540,1.332026
2_34,5525.225500,1.291868,891.6540,1.522308
2_35,9431.293500,1.414826,1157.4960,1.392368
2_36,5885.597000,1.333476,614.9980,1.361147


In [8]:
rg.fit_transform(data)

Unnamed: 0,reg_length_aoi_2_min,reg_length_aoi_2_max,reg_acceleration_aoi_2_mean,reg_speed_aoi_2_mean,reg_speed_aoi_2_kurtosis,reg_length_aoi_1_min,reg_length_aoi_1_max,reg_acceleration_aoi_1_mean,reg_speed_aoi_1_mean,reg_speed_aoi_1_kurtosis
1_1,0.011841,0.420153,101.520275,3.480621,4.196115,0.037026,0.272665,54.131503,3.396826,-1.593181
1_2,0.006408,0.321837,70.596140,3.276942,-0.922803,0.030813,0.261686,35.216219,2.167320,-0.934722
1_3,0.005249,0.265067,73.916643,2.629076,1.171818,0.016539,0.254050,89.722992,3.206941,3.553356
1_4,0.009554,0.277172,107.220463,3.406318,-0.759758,0.041129,0.049056,57.301368,2.288800,
1_5,0.002705,0.300800,76.290851,2.548291,1.412943,0.042946,0.104575,57.119943,2.593522,0.004752
...,...,...,...,...,...,...,...,...,...,...
2_33,0.007210,0.202824,97.018484,2.316939,1.628924,0.025199,0.221629,148.173164,3.673611,-0.042142
2_34,0.013271,0.303437,161.015426,3.315178,0.574263,0.023785,0.114924,132.171743,3.094738,-1.646757
2_35,0.007504,1.269198,186.293178,5.238658,6.259332,0.003259,0.559806,54.424245,2.653471,-1.497979
2_36,0.009415,0.504216,132.919882,3.304351,9.638348,0.017941,1.124204,142.120547,6.391043,6.352378


In [9]:
ms.fit_transform(data)

Unnamed: 0,microsac_length_aoi_2_min,microsac_length_aoi_2_max,microsac_acceleration_aoi_2_mean,microsac_speed_aoi_2_mean,microsac_speed_aoi_2_kurtosis,microsac_length_aoi_1_min,microsac_length_aoi_1_max,microsac_acceleration_aoi_1_mean,microsac_speed_aoi_1_mean,microsac_speed_aoi_1_kurtosis
1_1,0.011581,0.190765,1.167471,0.345077,1.952940,0.016593,0.198150,0.660408,0.287073,-0.298782
1_2,0.006408,0.176232,1.003533,0.318633,1.040025,0.014979,0.117818,1.040075,0.310151,0.457796
1_3,0.005065,0.185019,0.762113,0.245316,14.807231,0.003663,0.063193,0.744146,0.194864,1.458462
1_4,0.001980,0.110651,0.813152,0.241609,7.668473,0.041129,0.070823,1.077196,0.313787,-1.606570
1_5,0.002705,0.192296,0.942590,0.301805,3.149362,0.020778,0.104575,1.326198,0.359371,-0.409101
...,...,...,...,...,...,...,...,...,...,...
2_33,0.003962,0.181661,1.088828,0.270166,6.836550,0.016619,0.129722,0.902597,0.220889,-1.668084
2_34,0.011230,0.063878,0.750266,0.186453,3.302904,0.023785,0.114924,0.296183,0.161013,1.929328
2_35,0.007504,0.166901,1.533474,0.327634,5.700887,0.003259,0.180431,1.281691,0.399833,-1.697831
2_36,0.004319,0.154740,1.109342,0.287022,8.486159,0.017941,0.166901,0.951186,0.355342,-0.564187


In [10]:
extractor.fit_transform(data).head()

Unnamed: 0,sac_length_aoi_2_min,sac_length_aoi_2_max,sac_acceleration_aoi_2_mean,sac_speed_aoi_2_mean,sac_speed_aoi_2_kurtosis,sac_length_aoi_1_min,sac_length_aoi_1_max,sac_acceleration_aoi_1_mean,sac_speed_aoi_1_mean,sac_speed_aoi_1_kurtosis,...,microsac_length_aoi_2_min,microsac_length_aoi_2_max,microsac_acceleration_aoi_2_mean,microsac_speed_aoi_2_mean,microsac_speed_aoi_2_kurtosis,microsac_length_aoi_1_min,microsac_length_aoi_1_max,microsac_acceleration_aoi_1_mean,microsac_speed_aoi_1_mean,microsac_speed_aoi_1_kurtosis
1_1,0.011581,0.420153,88.408114,3.316192,2.582125,0.016593,0.272665,68.252408,3.213086,-0.278472,...,0.011581,0.420153,88.408114,3.316192,2.582125,0.016593,0.272665,68.252408,3.213086,-0.278472
1_2,0.006408,0.321837,85.79371,3.007439,-0.464949,0.014979,0.261686,54.794634,2.10352,-0.316122,...,0.006408,0.321837,85.79371,3.007439,-0.464949,0.014979,0.261686,54.794634,2.10352,-0.316122
1_3,0.005065,0.365776,94.441713,2.959531,6.020307,0.003663,0.25405,76.291863,2.827255,0.763153,...,0.005065,0.365776,94.441713,2.959531,6.020307,0.003663,0.25405,76.291863,2.827255,0.763153
1_4,0.00198,0.277172,121.601653,3.005369,0.249804,0.041129,0.272092,53.823491,2.801384,2.951833,...,0.00198,0.277172,121.601653,3.005369,0.249804,0.041129,0.272092,53.823491,2.801384,2.951833
1_5,0.002705,0.3008,69.126721,2.426858,1.67784,0.020778,0.104575,68.72506,2.409873,0.137052,...,0.002705,0.3008,69.126721,2.426858,1.67784,0.020778,0.104575,68.72506,2.409873,0.137052


In [11]:
rg_angles = RegressionFeatures(x=x, y=y, t=t, duration=dur,
                        pk=['Participant', 'tekst'],
                        aoi='AOI',
                        features_stats=sac_feats_stats,
                        shift_features=sac_feats_stats_shift,
                        shift_pk=['tekst'],
                        rule=(90, 180),
                        deviation=15)

In [12]:
rg_angles.fit_transform(data)

Unnamed: 0,reg_length_aoi_2_min,reg_length_aoi_2_max,reg_acceleration_aoi_2_mean,reg_speed_aoi_2_mean,reg_speed_aoi_2_kurtosis,reg_length_aoi_1_min,reg_length_aoi_1_max,reg_acceleration_aoi_1_mean,reg_speed_aoi_1_mean,reg_speed_aoi_1_kurtosis
1_1,0.011581,0.420153,88.408114,3.316192,2.582125,0.016593,0.272665,68.252408,3.213086,-0.278472
1_2,0.006408,0.321837,85.793710,3.007439,-0.464949,0.014979,0.261686,54.794634,2.103520,-0.316122
1_3,0.005065,0.365776,94.441713,2.959531,6.020307,0.003663,0.254050,76.291863,2.827255,0.763153
1_4,0.001980,0.277172,121.601653,3.005369,0.249804,0.041129,0.272092,53.823491,2.801384,2.951833
1_5,0.002705,0.300800,69.126721,2.426858,1.677840,0.020778,0.104575,68.725060,2.409873,0.137052
...,...,...,...,...,...,...,...,...,...,...
2_33,0.003962,0.255299,118.610232,2.502032,-0.540786,0.016619,0.221629,146.104445,2.998909,1.133978
2_34,0.011230,0.310389,150.700497,3.062124,-0.755616,0.023785,0.114924,132.171743,3.094738,-1.646757
2_35,0.007504,1.269198,184.508856,4.672236,9.241733,0.003259,0.559806,64.591090,2.472077,-0.814155
2_36,0.004319,0.504216,144.844016,3.262612,7.188011,0.017941,1.124204,142.120547,6.391043,6.352378
