# Data used in Andersson et al.2017
Contains data from 3 different conditions: 
- images
- dots
- videos


In [1]:
from numpy import *
import os
import scipy.io as io
import matplotlib.pyplot as plt
from math import atan2, degrees
import uneye
from matplotlib.pyplot import *
from sklearn.metrics import cohen_kappa_score as ck
import pandas as pd

In [2]:
# function for data loading
def load_andersson(filename,events_in,events_out):
    data = io.loadmat(filename)['ETdata']
    screenDim = data[0][0][1][0]
    Xpix = data[0][0][0][:,3]
    Ypix = data[0][0][0][:,4]
    screenRes = data[0][0][2][0]
    viewDist = data[0][0][3][0]
    sampFreq = data[0][0][4]
    labels = data[0][0][0][:,5]
    
    # Calculate the number of degrees that correspond to a single pixel. This will
    # generally be a very small value, something like 0.03.
    x_pixelperdeg = degrees(atan2(.5*screenDim[0], viewDist)) / (.5*screenRes[0])
    y_pixelperdeg = degrees(atan2(.5*screenDim[1], viewDist)) / (.5*screenRes[1])
    # Calculate the eye position in degrees
    Xdeg = Xpix * x_pixelperdeg
    Ydeg = Ypix * y_pixelperdeg
    
    
    # recode labels
    for [ev_in,ev_out] in zip(events_in,events_out):
        labels[labels==ev_in] = ev_out
    
    return Xdeg,Ydeg,labels

In [3]:
# Use data from images, dots and videos that only have coder RA labels for training, 
# Use common data from coder RA and MN for testing (see Andersson et al. (2017) Table 11)
# we generated subfolders in the folders "dots", "images","videos" to separate training and test trials
# subfolders were called 'coder RA' for training trials and 'both coders' for test trials
parentpath = '../data/Andersson/data'
folders = ['dots','images','videos']

# coder RA training

In [4]:
# event labels
fixation = 0
saccade = 1
pso = 2
pursuit = fixation
blink = 4
other = 5

# match events (recode original labels for network -> start at 0 with fixation)
events_in = [1,2,3,4,5,6]
events_out = [fixation,saccade,pso,pursuit,blink,other]

# read data

Ltrials = [] #Labels
Xtrials = [] # X position
Ytrials = [] # Y position
for f in range(len(folders)):
    
    
    path = os.path.join(parentpath,folders[f],'coder RA')
    files = os.listdir(path)
    
    for i in range(len(files)):
        if files[i].endswith('RA.mat'):
            filename = os.path.join(path,files[i])
            Xdeg,Ydeg,labels = load_andersson(filename,events_in,events_out)
            
            print(filename)
            
            Ltrials.append(labels)
            Xtrials.append(Xdeg)
            Ytrials.append(Ydeg)

# for training, data has to be a matrix. Cut trials to length of 2000bins. 
# append last value to shorter trials and label those as 'other'
min_dur = 2000
L = []
X = []
Y = []
for j in range(len(Ltrials)):
    n_cuts = int(floor(len(Ltrials[j])/min_dur))+1
    for n in range(n_cuts):

        if n==n_cuts-1:
            l = Ltrials[j][min_dur*n:]
            x = Xtrials[j][min_dur*n:]
            y = Ytrials[j][min_dur*n:]
            # concatenante end value to fill up to min_dur
            n_time = min_dur - len(l)
            l = concatenate([l,other*ones(n_time)]) #add 'other'
            x = concatenate([x,x[-1]*ones(n_time)])
            y = concatenate([y,y[-1]*ones(n_time)])
        else:
            l = Ltrials[j][min_dur*n:(n+1)*min_dur]
            x = Xtrials[j][min_dur*n:(n+1)*min_dur]
            y = Ytrials[j][min_dur*n:(n+1)*min_dur]
        L.append(l)
        X.append(x)
        Y.append(y)
    
L = array(L)
X = array(X)
Y = array(Y)
print(L.shape)


../data/Andersson/data/dots/coder RA/TH34_trial17_labelled_RA.mat
../data/Andersson/data/dots/coder RA/TH50_trial1_labelled_RA.mat
../data/Andersson/data/dots/coder RA/UL25_trial17_labelled_RA.mat
../data/Andersson/data/dots/coder RA/UH33_trial17_labelled_RA.mat
../data/Andersson/data/dots/coder RA/UL29_trial17_labelled_RA.mat
../data/Andersson/data/dots/coder RA/TL24_trial1_labelled_RA.mat
../data/Andersson/data/dots/coder RA/TL32_trial17_labelled_RA.mat
../data/Andersson/data/dots/coder RA/UL47_trial1_labelled_RA.mat
../data/Andersson/data/dots/coder RA/TH38_trial17_labelled_RA.mat
../data/Andersson/data/dots/coder RA/TL32_trial1_labelled_RA.mat
../data/Andersson/data/dots/coder RA/TL44_trial1_labelled_RA.mat
../data/Andersson/data/dots/coder RA/TH36_trial17_labelled_RA.mat
../data/Andersson/data/dots/coder RA/UH31_trial17_labelled_RA.mat
../data/Andersson/data/images/coder RA/TH50_img_vy_labelled_RA.mat
../data/Andersson/data/images/coder RA/TL44_img_konijntjes_labelled_RA.mat
../da

# Train network

In [15]:
######## TRAINING ##########

# train 20 independent networks

sampfreq = 500
classes = len(unique(events_out))
repeats = 20

for i in range(repeats):
    weights_name = 'Andersson/Andersson_RA_'+str(i)
    
    model = uneye.DNN(weights_name=weights_name,classes=classes,sampfreq=sampfreq)
    model.train(X,Y,L,seed=i) #use different random seed each time

Early stopping at epoch 110 because overfitting was detected on validation set
Model parameters saved to ./training/Andersson_RA_10


# Test data (both coders)

In [60]:
repeats = 20
sampfreq = 500
# Pandas Dataframe
df = pd.DataFrame(columns=['condition','coder_RA','coder_MN','X','Y','prediction'])

################
# event labels #
################

fixation = 0
saccade = 1
pso = 2
pursuit = 3
blink = 4
other = 5

# match events (recode original labels for network -> start at 0 with fixation)
events_in = [1,2,3,4,5,6]
events_out = [fixation,saccade,pso,pursuit,blink,other]
classes = 5
##################################################################
# read labelled data from both coders and get network prediction #
##################################################################

parentpath = '../data/Andersson/data'
folders = ['dots','images','videos']

for f in range(len(folders)):
    
    path = os.path.join(parentpath,folders[f],'both coders')
    files = os.listdir(path)
    files = sort(files)    
    for i in range(len(files)):
        if files[i].endswith('RA.mat'):
            filename = os.path.join(path,files[i]) # labels from coder RA
            Xdeg,Ydeg,labels_RA = load_andersson(filename,events_in,events_out)
            filename = os.path.join(path,files[i][:-6]+'MN.mat') #same file, labels from coder MN
            Xdeg,Ydeg,labels_MN = load_andersson(filename,events_in,events_out)
            
            # get network prediction
            Pred = zeros((len(Xdeg),repeats))
            for rep in range(repeats):
                weights_name = 'Andersson/Andersson_RA_%s' %rep
                model = uneye.DNN(weights_name=weights_name,classes=classes,sampfreq=sampfreq)
                pred,_ = model.predict(Xdeg,Ydeg)
                Pred[:,rep] = pred
            
            df = df.append({'condition':folders[f],
                            'coder_RA':labels_RA,
                      'coder_MN':labels_MN,
                      'X':Xdeg,
                      'Y':Ydeg,
                       'prediction':Pred},ignore_index=True)
    
########################
# evaluate performance #
########################
# by condition #
################

df_perf = pd.DataFrame(columns=['condition','comparison','network','fix','sacc','pso'])
for f in folders:
    
    a = array(df[df['condition']==f]['prediction'])
    ra = array(df[df['condition']==f]['coder_RA'])
    mn = array(df[df['condition']==f]['coder_MN'])
    for rep in range(repeats):
        l_uneye = []
        l_RA = []
        l_MN = []
        cond = []
        for [ai,lra,lmn] in zip(a,ra,mn):
            l_uneye.extend(ai[:,rep])
            l_RA.extend(lra)
            l_MN.extend(lmn)

            
        # exclude label 'others' for calculation of Cohen's Kappa
        keep_ind = ((array(l_RA)<other) & (array(l_MN)<other))
        
        # uneye vs. coder RA
        kappa_fix = ck((array(l_RA)[keep_ind]==fixation).astype(float),(array(l_uneye)[keep_ind]==fixation).astype(float)) 
        kappa_sacc = ck((array(l_RA)[keep_ind]==saccade).astype(float),(array(l_uneye)[keep_ind]==saccade).astype(float)) 
        kappa_pso = ck((array(l_RA)[keep_ind]==pso).astype(float),(array(l_uneye)[keep_ind]==pso).astype(float))   
        kappa_blink = ck((array(l_RA)[keep_ind]==blink).astype(float),(array(l_uneye)[keep_ind]==blink).astype(float)) 
        df_perf = df_perf.append({'condition':f,
                                  'comparison':'uneye_RA',
                                  'network':rep,
                                 'fix':kappa_fix,
                                 'sacc':kappa_sacc,
                                 'pso':kappa_pso,
                                 'blink':kappa_blink},ignore_index=True)
        # uneye vs. coder MN
        kappa_fix = ck((array(l_MN)[keep_ind]==fixation).astype(float),(array(l_uneye)[keep_ind]==fixation).astype(float)) 
        kappa_sacc = ck((array(l_MN)[keep_ind]==saccade).astype(float),(array(l_uneye)[keep_ind]==saccade).astype(float)) 
        kappa_pso = ck((array(l_MN)[keep_ind]==pso).astype(float),(array(l_uneye)[keep_ind]==pso).astype(float)) 
        kappa_blink = ck((array(l_MN)[keep_ind]==blink).astype(float),(array(l_uneye)[keep_ind]==blink).astype(float)) 
        df_perf = df_perf.append({'condition':f,
                                  'comparison':'uneye_MN',
                                  'network':rep,
                                 'fix':kappa_fix,
                                 'sacc':kappa_sacc,
                                 'pso':kappa_pso,
                                 'blink':kappa_blink},ignore_index=True)
        if rep==0:
            # coder RA vs. coder MN
            kappa_fix = ck((array(l_MN)[keep_ind]==fixation).astype(float),(array(l_RA)[keep_ind]==fixation).astype(float)) 
            kappa_sacc = ck((array(l_MN)[keep_ind]==saccade).astype(float),(array(l_RA)[keep_ind]==saccade).astype(float)) 
            kappa_pso = ck((array(l_MN)[keep_ind]==pso).astype(float),(array(l_RA)[keep_ind]==pso).astype(float))
            kappa_blink = ck((array(l_MN)[keep_ind]==blink).astype(float),(array(l_RA)[keep_ind]==blink).astype(float)) 
            df_perf = df_perf.append({'condition':f,
                                      'comparison':'RA_MN',
                                      'network':rep,
                                     'fix':kappa_fix,
                                     'sacc':kappa_sacc,
                                     'pso':kappa_pso,
                                     'blink':kappa_blink},ignore_index=True)
#######################
# overall performance #
#######################

a = array(df['prediction'])
ra = array(df['coder_RA'])
mn = array(df['coder_MN'])
for rep in range(repeats):
    l_uneye = []
    l_RA = []
    l_MN = []
    cond = []
    for [ai,lra,lmn] in zip(a,ra,mn):
        l_uneye.extend(ai[:,rep])
        l_RA.extend(lra)
        l_MN.extend(lmn)

        
    # exclude label 'others' for calculation of Cohen's Kappa
    keep_ind = ((array(l_RA)<other) & (array(l_MN)<other))
    
    # uneye vs. coder RA
    kappa_fix = ck((array(l_RA)[keep_ind]==fixation).astype(float),(array(l_uneye)[keep_ind]==fixation).astype(float)) 
    kappa_sacc = ck((array(l_RA)[keep_ind]==saccade).astype(float),(array(l_uneye)[keep_ind]==saccade).astype(float)) 
    kappa_pso = ck((array(l_RA)[keep_ind]==pso).astype(float),(array(l_uneye)[keep_ind]==pso).astype(float))   
    kappa_blink = ck((array(l_RA)[keep_ind]==blink).astype(float),(array(l_uneye)[keep_ind]==blink).astype(float)) 
    df_perf = df_perf.append({'condition':'all',
                              'comparison':'uneye_RA',
                              'network':rep,
                             'fix':kappa_fix,
                             'sacc':kappa_sacc,
                             'pso':kappa_pso,
                             'blink':kappa_blink},ignore_index=True)
    # uneye vs. coder MN
    kappa_fix = ck((array(l_MN)[keep_ind]==fixation).astype(float),(array(l_uneye)[keep_ind]==fixation).astype(float)) 
    kappa_sacc = ck((array(l_MN)[keep_ind]==saccade).astype(float),(array(l_uneye)[keep_ind]==saccade).astype(float)) 
    kappa_pso = ck((array(l_MN)[keep_ind]==pso).astype(float),(array(l_uneye)[keep_ind]==pso).astype(float)) 
    kappa_blink = ck((array(l_MN)[keep_ind]==blink).astype(float),(array(l_uneye)[keep_ind]==blink).astype(float))
    df_perf = df_perf.append({'condition':'all',
                              'comparison':'uneye_MN',
                              'network':rep,
                             'fix':kappa_fix,
                             'sacc':kappa_sacc,
                             'pso':kappa_pso,
                             'blink':kappa_blink},ignore_index=True)
    if rep==0:
        # coder RA vs. coder MN
        kappa_fix = ck((array(l_MN)[keep_ind]==fixation).astype(float),(array(l_RA)[keep_ind]==fixation).astype(float)) 
        kappa_sacc = ck((array(l_MN)[keep_ind]==saccade).astype(float),(array(l_RA)[keep_ind]==saccade).astype(float)) 
        kappa_pso = ck((array(l_MN)[keep_ind]==pso).astype(float),(array(l_RA)[keep_ind]==pso).astype(float)) 
        kappa_blink = ck((array(l_MN)[keep_ind]==blink).astype(float),(array(l_RA)[keep_ind]==blink).astype(float))
        df_perf = df_perf.append({'condition':'all',
                                  'comparison':'RA_MN',
                                  'network':rep,
                                 'fix':kappa_fix,
                                 'sacc':kappa_sacc,
                                 'pso':kappa_pso,
                                 'blink':kappa_blink},ignore_index=True)
        
df_perf.groupby(['comparison','condition']).median()


Unnamed: 0_level_0,Unnamed: 1_level_0,fix,sacc,pso,blink
comparison,condition,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RA_MN,all,0.815066,0.894974,0.73079,0.908152
RA_MN,dots,0.612198,0.797525,0.586451,0.770851
RA_MN,images,0.839674,0.90736,0.761809,0.924931
RA_MN,videos,0.65254,0.879079,0.64547,0.818389
uneye_MN,all,0.395558,0.885799,0.718562,0.848901
uneye_MN,dots,0.013309,0.799211,0.588773,0.64169
uneye_MN,images,0.848648,0.884503,0.723343,0.853643
uneye_MN,videos,0.13436,0.908756,0.726412,0.879858
uneye_RA,all,0.341078,0.874398,0.689181,0.815081
uneye_RA,dots,0.015554,0.782686,0.591038,0.784537


In [65]:
# average performance compared to both coders
#df_grouped = df_perf.groupby(['comparison','condition'])
comp = where(df_perf['comparison']!='RA_MN')[0]
df_grouped = df_perf.iloc[list(comp)]
df_grouped = df_grouped.groupby(['comparison','condition']).median()
df_grouped.groupby('condition').mean()

Unnamed: 0_level_0,fix,sacc,pso,blink
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
all,0.368318,0.880098,0.703872,0.831991
dots,0.014431,0.790948,0.589906,0.713113
images,0.805598,0.885434,0.717672,0.83503
videos,0.108736,0.88501,0.675472,0.838304
