### Eye movements Feature extraction

The feature extraction methods  used by Crabb et al. (2014) is based on count of centralised seccades. The first step is to centralise the saccades, i.e., saccades starting position is translated to (0,0) in cartesian coordinate. Then the cartesian coordinte is divided into grides in such away that each gride has a 2 by 2 degrees size. The features are the count of saccades that land in each gride.

In [1]:
import numpy as np
from scipy import interp, stats, integrate
from sklearn import svm, datasets,metrics
import pandas as pd
import pickle,sys,glob,os
import seaborn as sns
from sklearn.decomposition import PCA,KernelPCA
from sklearn.preprocessing import MinMaxScaler

from sklearn.pipeline import Pipeline
from numpy import random
import random
np.random.seed(5); random.seed(5)

from tqdm import tqdm
import matplotlib.pyplot as plt


In [14]:

def Angle_between(p1, p2):
    """"
    Angle between the horizontal line and the saccade or
    angle between (0,0) and the end of the saccade
    """
    ang1 = np.arctan2(*p1[::-1])
    ang2 = np.arctan2(*p2[::-1])
    return np.rad2deg((ang1 - ang2) % (2 * np.pi))

def Build_2D_histogram(Amp, Ang):  
    """"
    compute 2D histogram of the saccades that land in each cartesian coordinate grid
    """ 
    
    Sacc_X = Amp * np.cos(Ang)
    Sacc_Y = Amp * np.sin(Ang)
  
    res = np.histogram2d(Sacc_X, Sacc_Y, bins=[BINSY,BINSX])

    return(res[0])


In [15]:
def Process_data_stat(Df_sacc,csv_file,video):
    """
    extract the saccades to compute the histogram
    """   

    Df_saccades = Df_sacc[1:]
    Df_saccades = Df_saccades[['metric','video','Stime','Sx','Sy','Ex','Ey','amp','vel']]        
    Df_saccades = Df_saccades.reset_index()        

    # convert time data to seconds
    Df_saccades['amp'] = Df_saccades['amp'].astype(float)
    Df_saccades['Stime'] = Df_saccades['Stime'].astype(float)
    Df_saccades['Stime'] = (Df_saccades['Stime'] - Df_saccades['Stime'][0])/1000    

    # exclude smaller saccades    
    Sx = (Df_saccades.Sx).astype(float)
    Sy = (Df_saccades.Sy).astype(float)            
    Ex = (Df_saccades.Ex).astype(float)
    Ey = (Df_saccades.Ey).astype(float)
            
    A = (Sx-Sx, Sy-Sy)
    B = (Ex-Sx, Ey-Sy)
    
    ang_ = Angle_between( B,A)
    
    Df_saccades['Ang'] = ang_             
    Df_saccades = Df_saccades.reset_index()    
                
    Res_data = Build_2D_histogram(Df_saccades.amp, Df_saccades.Ang) 
        
    return(Res_data)



In [42]:
# trials that are excluded based on the preprocesing
CV1 = ['C003','C023','C030']
CV2 = ['C027','C030']
CV3 = ['C026','C027']

GV1 = ['G010','G013','G024']
GV2 = ['G003','G024','G031', 'G033']
GV3 = ['G001','G026','G031','G036']

Cvid= []
Cvid.append(CV1)
Cvid.append(CV2)
Cvid.append(CV3)


Gvid= []
Gvid.append(GV1)
Gvid.append(GV2)
Gvid.append(GV3)


Excluded_trials = {'Control':Cvid,
'Glaucoma': Gvid}


In [45]:
path = '../Data'
Saving_path = '../Features_Based_ON_Paper/'

BINSX = np.array(range(-12,14,2))
BINSY = np.array(range(-10,12,2))

path = '../All_data' 

# path to th csv files
sub_foldres = os.listdir(path)
videos  = ['DadsArmy','HistoryBoys','SkiCross']

Exclude = []
for folder in sub_foldres:
    print(f'*********************************************')
    print(f'********** processing {folder} data *******')
    print(f'*********************************************')

    csvs = glob.glob(os.path.join(path,folder) + '/*.csv')

    Exclude_vids = Excluded_trials[folder]
    for csv in csvs:
        dataframe = pd.read_csv(csv,  usecols=[0,1,2,3,4,5,6,7,8,9,10,11], 
                                names = ['metric','video','Eye','Stime','Etime','Duration','Sx','Sy','Ex','Ey','amp','vel'], 
                                header = None) 
                            
        X = [];  Y = []
        
        for ii in range(len(videos)):
            Exclude_vid = Exclude_vids[ii]
            Df_saccades = dataframe[(dataframe['metric'] == 'Saccade') & (dataframe['video'] == videos[ii])]               

            if len(Df_saccades.amp) > 50 and csv[-8:-4] not in Exclude_vid:                  
                Features = Process_data_stat(Df_saccades,csv, videos[ii]) 

                # remove saccades smaller than 2 degrees as describe in the paper
                Features[4:6,5:7] = np.float("NaN")         
                Features = np.array(Features).flatten()

                x = Features[~np.isnan(Features)]/len(Df_saccades.amp)
                X.append(x)

            else:
                print(f"Significant saccdes missing in {csv[-8:-4]} {videos[ii]}")
                x = np.empty((116,))
                x[:] = np.NaN
                X.append(x)


        if folder =='Control':
            Y = [0,0,0]
        else:
            Y = [1,1,1]
        np.save(os.path.join(Saving_path, csv[-8:-4] + '.npy'), dict(X = X,Y = Y))



*********************************************
********** processing Control data *******
*********************************************
Significant saccdes missing in C003 DadsArmy
Significant saccdes missing in C023 DadsArmy
Significant saccdes missing in C026 SkiCross
Significant saccdes missing in C027 HistoryBoys
Significant saccdes missing in C027 SkiCross
Significant saccdes missing in C030 DadsArmy
Significant saccdes missing in C030 HistoryBoys
*********************************************
********** processing Glaucoma data *******
*********************************************
Significant saccdes missing in G001 SkiCross
Significant saccdes missing in G003 HistoryBoys
Significant saccdes missing in G010 DadsArmy
Significant saccdes missing in G010 HistoryBoys
Significant saccdes missing in G010 SkiCross
Significant saccdes missing in G013 DadsArmy
Significant saccdes missing in G024 DadsArmy
Significant saccdes missing in G024 HistoryBoys
Significant saccdes missing in G026 Ski

In [7]:
# acessing the processed features
re = np.load(os.path.join(Saving_path, 'G010.npy'),allow_pickle=True)
print(np.asarray(re.item().get('X')).shape)
print(np.asarray(re.item().get('Y')).shape)


(3, 116)
(3,)


In [11]:
a = np.array([1,1])
b= np.array([2,2])
np.linalg.norm(a-b)

1.4142135623730951

In [40]:

cc = ['C003', 'C023', 'C030']
'ABC' not in cc



True

In [8]:
np.asarray(re.item().get('X'))

array([[0.00344828, 0.00344828, 0.        , 0.        , 0.        ,
        0.00344828, 0.00344828, 0.00344828, 0.00689655, 0.01034483,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.00344828, 0.        ,
        0.00344828, 0.00344828, 0.        , 0.00344828, 0.        ,
        0.00344828, 0.00344828, 0.        , 0.        , 0.        ,
        0.00344828, 0.        , 0.        , 0.        , 0.00344828,
        0.        , 0.01034483, 0.00344828, 0.00689655, 0.00344828,
        0.00689655, 0.03448276, 0.        , 0.00344828, 0.00689655,
        0.00344828, 0.        , 0.01034483, 0.        , 0.00689655,
        0.00344828, 0.00689655, 0.00689655, 0.02068966, 0.00344828,
        0.00344828, 0.        , 0.        , 0.00344828, 0.00344828,
        0.00689655, 0.0137931 , 0.01034483, 0.00689655, 0.00344828,
        0.        , 0.00689655, 0.00344828, 0.00689655, 0.00344828,
        0.        , 0.00689655, 0.01034483, 0.02