In [1]:
import os
from os.path import join

import numpy as np
import pandas as pd

In [2]:
DATA_PATH = join("..", "data", "Dyslexia_1_ready_data_fixations.csv")

In [3]:
def read_dataset(path: str = DATA_PATH):
    df = pd.read_csv(path, index_col="Unnamed: 0")
    
    df["SentenceID"] = df["Sentence_ID"]
    df["x"] = df["FIX_X"].apply(lambda x: float(x.replace(",", ".")))
    df["y"] = df["FIX_Y"].apply(lambda x: float(x.replace(",", ".")))
    df.x /= df.x.max()
    df.y /= df.y.max()
    df["duration"] = df["FIX_DURATION"].apply(lambda x: float(x) / 1000)
    df["timestamp"] = df.duration.cumsum()
    df["AOI_1"] = np.random.choice([0, 1], size=len(df))
    df["AOI_2"] = np.random.choice([11, 22, 33], size=len(df))
    df["AOI_3"] = np.random.choice(['A', 'B', 'C'], size=len(df))
    
    df.drop(["Word_Number", "IQ", "FIX_X", "FIX_Y", "FIX_DURATION", "Age", "Sentence_ID"], axis=1, inplace=True)
    return df

In [4]:
df = read_dataset()
df.head()

Unnamed: 0,SubjectID,Group,Sex,SentenceID,x,y,duration,timestamp,AOI_1,AOI_2,AOI_3
0,nnr8,3,fem,27,0.101158,0.726088,0.345,0.345,1,22,A
1,nnr8,3,fem,27,0.102406,0.754083,0.221,0.566,0,11,A
2,nnr8,3,fem,27,0.126049,0.755455,0.204,0.77,1,33,C
3,nnr8,3,fem,27,0.142134,0.761356,0.289,1.059,1,11,B
4,nnr8,3,fem,27,0.109963,0.766433,0.6,1.659,1,22,A


### Invidual Normalization

Extractor:

In [5]:
import eyefeatures.features.stats as eye_stats
from eyefeatures.features.extractor import Extractor

sac_feats_stats = {
    'length': ['min', 'max'],
    'acceleration': ['mean']
}
dependent_features = {
    'length': ['min']
}
independent_features = {
    'acceleration': ['mean']
}

sf = eye_stats.SaccadeFeatures(
    features_stats=sac_feats_stats,
    shift_pk=['SentenceID']
)

extractor = Extractor(
    features=[sf],
    x='x',
    y='y',
    t='timestamp',
    duration='duration',
    pk=['SubjectID', 'SentenceID'],
    leave_pk=True,
    extra=['Sex'],
    aggr_extra=lambda x: x.iloc[0],
    return_df=True
)

extractor.fit_transform(df).head()

100%|██████████| 1/1 [00:00<00:00,  3.23it/s]


Unnamed: 0,sac_length_min,sac_length_max,sac_acceleration_mean,SubjectID,SentenceID,Sex
nnr26_4,0.019488,0.092498,0.509739,nnr26,4,masc
nnr26_5,0.013345,0.080962,0.45901,nnr26,5,masc
nnr26_6,0.018656,0.112882,0.57846,nnr26,6,masc
nnr26_7,0.015868,0.118729,0.386805,nnr26,7,masc
nnr26_8,0.023546,0.090291,0.434029,nnr26,8,masc


Extractor with individual normalization:

In [6]:
from eyefeatures.features.shift import IndividualNormalization

extractor = Extractor(
    features=[sf],
    x='x',
    y='y',
    t='timestamp',
    duration='duration',
    pk=['SubjectID', 'SentenceID'],
    leave_pk=True,
    extra=['Sex'],
    aggr_extra=lambda x: x.iloc[0],
    return_df=True
)
ind_norm = IndividualNormalization(
    pk=['SubjectID', 'Sex'],
    inplace=False,
    independent_features=independent_features,
    dependent_features=dependent_features
)

features = extractor.fit_transform(df)
features_normed = ind_norm.fit_transform(features)
features_normed

100%|██████████| 1/1 [00:00<00:00,  3.42it/s]


Unnamed: 0,sac_length_min,sac_length_max,sac_acceleration_mean,SubjectID,SentenceID,Sex,sac_length_min_norm,sac_acceleration_mean_norm
nnr26_4,0.019488,0.092498,0.509739,nnr26,4,masc,0.983311,-0.135582
nnr26_5,0.013345,0.080962,0.459010,nnr26,5,masc,-0.443048,-0.239855
nnr26_6,0.018656,0.112882,0.578460,nnr26,6,masc,0.790175,0.005672
nnr26_7,0.015868,0.118729,0.386805,nnr26,7,masc,0.142752,-0.388273
nnr26_8,0.023546,0.090291,0.434029,nnr26,8,masc,1.925467,-0.291204
...,...,...,...,...,...,...,...,...
nnr85_29,0.014600,0.140295,0.595447,nnr85,29,fem,-0.930002,-0.474158
nnr85_30,0.014928,0.417827,1.273437,nnr85,30,fem,-0.898417,0.279562
nnr85_31,0.034824,0.161574,0.607337,nnr85,31,fem,1.017064,-0.460940
nnr85_32,0.013381,0.140145,0.725873,nnr85,32,fem,-1.047341,-0.329164
