# EMICS 2021 Analysis (ROC analysis, 60 Hz)

This R-notebook summarizes the statistical analyses performed to compare classification performances using their ROC curves

In [1]:
library(pROC)
library(jsonlite)

devices <- c('Eyelink', 'EyeSeeCam', 'EyeTribe')
classifiers <- c('LogR', 'kNN', 'SVM')

folder.in <- 'results/60Hz'
file.in <- 'predictions.csv'

pred <- read.csv(file.path(folder.in, file.in), sep='\t')

Type 'citation("pROC")' for a citation.


Attaching package: 'pROC'


The following objects are masked from 'package:stats':

    cov, smooth, var




## AUC statistics - slope method - Fig. 1 (right)

### AUC and 95% confidence intervals using all available responses

In [15]:
# Eye tracker comparison - 95% CI for AUC using the Stoll (2013) method
# Shown as the black markers in Fig. 1 (right)
pred.stoll <- pred[pred$model == 'Stoll2013',]

rocs.stoll <- list()
rocs.stoll.df <- data.frame();

for(tracker in devices) {

    mroc <- roc(pred.stoll$true_val[pred.stoll$device == tracker], 
                pred.stoll$slope[pred.stoll$device == tracker], 
                levels=c(1, 2), 
                dir='<')

    rocs.stoll[[tracker]] <- mroc
    ci.stoll <- as.numeric(ci.auc(mroc))
    rocs.stoll.df <- rbind.data.frame(rocs.stoll.df, c(ci.stoll))
}

rocs.stoll.df <- cbind.data.frame(devices, rocs.stoll.df)
colnames(rocs.stoll.df) <- c('device', 'ci.min', 'auc', 'ci.max')

# Save results (to be read by Python figure1() function)
write_json(rocs.stoll.df, file.path(folder.in, 'stoll_auc.json'))

# Print results to notebook
rocs.stoll.df

device,ci.min,auc,ci.max
<chr>,<dbl>,<dbl>,<dbl>
Eyelink,0.7204935,0.7716128,0.8227321
EyeSeeCam,0.8471234,0.8826885,0.9182536
EyeTribe,0.7085462,0.7593183,0.8100905


### Is performance similar between eye trackers when using the slope method?

In [3]:
# Eye tracker comparison - global ROCs
roc.EL_ES <- roc.test(rocs.stoll[['EyeSeeCam']], rocs.stoll[['Eyelink']])
print(roc.EL_ES)

roc.EL_ET <- roc.test(rocs.stoll[['Eyelink']], rocs.stoll[['EyeTribe']])
print(roc.EL_ET)

roc.ES_ET <- roc.test(rocs.stoll[['EyeSeeCam']], rocs.stoll[['EyeTribe']])
print(roc.ES_ET)

# Multiple comparisons correction
roc.p <- c(roc.EL_ES$p.value, roc.EL_ET$p.value, roc.ES_ET$p.value)
p.adjust(roc.p, method='holm')


	DeLong's test for two ROC curves

data:  rocs.stoll[["EyeSeeCam"]] and rocs.stoll[["Eyelink"]]
D = 3.4959, df = 597.75, p-value = 0.0005075
alternative hypothesis: true difference in AUC is not equal to 0
sample estimates:
AUC of roc1 AUC of roc2 
  0.8826885   0.7716128 


	DeLong's test for two ROC curves

data:  rocs.stoll[["Eyelink"]] and rocs.stoll[["EyeTribe"]]
D = 0.33445, df = 669.97, p-value = 0.7381
alternative hypothesis: true difference in AUC is not equal to 0
sample estimates:
AUC of roc1 AUC of roc2 
  0.7716128   0.7593183 


	DeLong's test for two ROC curves

data:  rocs.stoll[["EyeSeeCam"]] and rocs.stoll[["EyeTribe"]]
D = 3.9007, df = 599.96, p-value = 0.0001068
alternative hypothesis: true difference in AUC is not equal to 0
sample estimates:
AUC of roc1 AUC of roc2 
  0.8826885   0.7593183 



### How many participants are significantly above chance level?

In [4]:
# Eyelink: individual subjects
subEL <- data.frame()
for(pp in 1:24) {
    mroc <- roc(pred.stoll$true_val[pred.stoll$device == 'Eyelink' & pred.stoll$ppid == pp], 
              pred.stoll$slope[pred.stoll$device == 'Eyelink' & pred.stoll$ppid == pp],
              levels=c(1, 2), 
              dir='<')
    auc <- auc(mroc)
    aci <- ci.auc(mroc)
    subEL <- rbind(subEL, c(pp, aci[1], aci[2], aci[3]))
}
colnames(subEL) <- c('pp', 'ci.min', 'auc', 'ci.max')
subEL$abovechance <- 0
subEL$abovechance[subEL$ci.min > 0.5] <- 1


# EyeSeeCam: individual subjects
subES <- data.frame()
for(pp in 1:24) {
    mroc <- roc(pred.stoll$true_val[pred.stoll$device == 'EyeSeeCam' & pred.stoll$ppid == pp], 
              pred.stoll$slope[pred.stoll$device == 'EyeSeeCam' & pred.stoll$ppid == pp],
              levels=c(1, 2), 
              dir='<')
    auc <- auc(mroc)
    aci <- ci.auc(mroc)
    subES <- rbind(subES, c(pp, aci[1], aci[2], aci[3]))
}
colnames(subES) <- c('pp', 'ci.min', 'auc', 'ci.max')
subES$abovechance <- 0
subES$abovechance[subES$ci.min > 0.5] <- 1


# EyeTribe: individual subjects
subET <- data.frame()
for(pp in 1:24) {
    mroc <- roc(pred.stoll$true_val[pred.stoll$device == 'EyeTribe' & pred.stoll$ppid == pp], 
              pred.stoll$slope[pred.stoll$device == 'EyeTribe' & pred.stoll$ppid == pp],
              levels=c(1, 2), 
              dir='<')
    auc <- auc(mroc)
    aci <- ci.auc(mroc)
    subET <- rbind(subET, c(pp, aci[1], aci[2], aci[3]))
}
colnames(subET) <- c('pp', 'ci.min', 'auc', 'ci.max')
subET$abovechance <- 0
subET$abovechance[subET$ci.min > 0.5] <- 1

cat('Participants classified above chance level:\n')
cat(paste('Eyelink:\tAUC > chance: ', sum(subEL$auc > 0.5), ', CImin > chance: ', sum(subEL$abovechance), '\n', sep=''))
cat(paste('EyeSeeCam:\tAUC > chance: ', sum(subES$auc > 0.5), ', CImin > chance: ', sum(subES$abovechance), '\n', sep=''))
cat(paste('EyeTribe:\tAUC > chance: ', sum(subET$auc > 0.5), ', CImin > chance: ', sum(subET$abovechance), '\n', sep=''))

"ci.auc() of a ROC curve with AUC == 1 is always 1-1 and can be misleading."
"ci.auc() of a ROC curve with AUC == 1 is always 1-1 and can be misleading."
"ci.auc() of a ROC curve with AUC == 1 is always 1-1 and can be misleading."
"ci.auc() of a ROC curve with AUC == 1 is always 1-1 and can be misleading."
"ci.auc() of a ROC curve with AUC == 1 is always 1-1 and can be misleading."
"ci.auc() of a ROC curve with AUC == 1 is always 1-1 and can be misleading."
"ci.auc() of a ROC curve with AUC == 1 is always 1-1 and can be misleading."
"ci.auc() of a ROC curve with AUC == 1 is always 1-1 and can be misleading."
"ci.auc() of a ROC curve with AUC == 1 is always 1-1 and can be misleading."
"ci.auc() of a ROC curve with AUC == 1 is always 1-1 and can be misleading."
"ci.auc() of a ROC curve with AUC == 1 is always 1-1 and can be misleading."


Participants classified above chance level:
Eyelink:	AUC > chance: 23, CImin > chance: 12
EyeSeeCam:	AUC > chance: 23, CImin > chance: 19
EyeTribe:	AUC > chance: 21, CImin > chance: 13


## Trial-based classification

### Which models differ from each other and from the Stoll et al. (2013) models?

In [13]:
pred.tr <- pred[pred$model %in% classifiers & pred$trace == 'trials' ,]

# Eye tracker comparison per classifier, using 95% CI for AUC
rocs.trial <- list()
for(tracker in devices) {
  
    rocs.trial[[tracker]] <- list()
  
    for (model in classifiers) {
        mroc <- roc(pred.tr$true_val[pred.tr$device == tracker & pred.tr$model == model], 
                    pred.tr$pred_prob[pred.tr$device == tracker & pred.tr$model == model],
              levels=c(1, 2), 
              dir='<')
        rocs.trial[[tracker]][[model]] <- mroc
    }
}

# Comparison to slope method when using full 10s trace (DS/full in Fig. 2)
pred.stoll.all <- pred[pred$model == 'Stoll_all',]
rocs.stoll.all <- list()
for(tracker in devices) {
    mroc <- roc(pred.stoll.all$true_val[pred.stoll.all$device == tracker], 
                pred.stoll.all$slope[pred.stoll.all$device == tracker], 
                levels=c(1, 2), 
                dir='<')
    rocs.stoll.all[[tracker]] <- mroc
}

# Compare all models using roc.test
final.trials <- data.frame()
for(tracker in devices) {
    for (model1 in classifiers) {
        for (model2 in classifiers) {
            roc.cmp <- roc.test(rocs.trial[[tracker]][[model1]], rocs.trial[[tracker]][[model2]])
            final.trials <- rbind(final.trials, c(tracker, model1, model2, roc.cmp$p.value[1], roc.cmp$statistic[1], roc.cmp$roc1$auc[1], roc.cmp$roc2$auc[1]))
        }

        # Also compare against Stoll2013 model...
        roc.cmpDS <- roc.test(rocs.trial[[tracker]][[model1]], rocs.stoll[[tracker]])
        final.trials <- rbind(final.trials, c(tracker, model1, 'DS', roc.cmpDS$p.value[1], roc.cmpDS$statistic[1], roc.cmpDS$roc1$auc[1], roc.cmpDS$roc2$auc[1]))

        # ...and DS_full model
        roc.cmpDSf <- roc.test(rocs.trial[[tracker]][[model1]], rocs.stoll.all[[tracker]])
        final.trials <- rbind(final.trials, c(tracker, model1, 'DS_full', roc.cmpDSf$p.value[1], roc.cmpDSf$statistic[1], roc.cmpDSf$roc1$auc[1], roc.cmpDSf$roc2$auc[1]))

    }
}
colnames(final.trials) <- c('tracker', 'model1', 'model2', 'p', 'z', 'auc1', 'auc2')

# Multiple comparisons correction, within each tracker
for(tracker in c('Eyelink', 'EyeSeeCam', 'EyeTribe')) {
  final.trials$p.holm[final.trials$tracker == tracker] <- p.adjust(final.trials$p[final.trials$tracker == tracker], method='holm')
}
final.trials$different <- 0
final.trials$different[final.trials$p.holm < 0.05] <- 1
final.trials$significance <- ''
final.trials$significance[final.trials$p.holm < 0.05] <- '*'
final.trials$significance[final.trials$p.holm < 0.01] <- '**'
final.trials$significance[final.trials$p.holm < 0.001] <- '***'

# Drop self-comparisons
final.trials <- final.trials[final.trials$z > 0,]

# Save comparison results
write.table(final.trials, file.path(folder.in, 'cls_comp_trials.csv'), sep='\t', row.names=F)

# Show table of model comparisons
final.trials

Unnamed: 0_level_0,tracker,model1,model2,p,z,auc1,auc2,p.holm,different,significance
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>
4,Eyelink,LogR,DS,0.0009961981434368,3.29159824406422,0.851792800453515,0.771612811791383,0.007969585,1,**
5,Eyelink,LogR,DS_full,0.0001948876152706,3.72555284739497,0.851792800453515,0.789824263038549,0.001794213,1,**
6,Eyelink,kNN,LogR,0.0980185762717849,1.65453638784965,0.866514314058957,0.851792800453515,0.68613,0,
9,Eyelink,kNN,DS,0.0001794213221197,3.74635683609128,0.866514314058957,0.771612811791383,0.001794213,1,**
10,Eyelink,kNN,DS_full,7.79437658826864e-06,4.47075549916103,0.866514314058957,0.789824263038549,0.0001091213,1,***
11,Eyelink,SVM,LogR,5.74066532756558e-05,4.02322480435085,0.867116638321995,0.851792800453515,0.0006888798,1,***
12,Eyelink,SVM,kNN,0.941031952456505,0.0739728954468101,0.867116638321995,0.866514314058957,1.0,0,
14,Eyelink,SVM,DS,4.04922918686577e-05,4.10465259601937,0.867116638321995,0.771612811791383,0.0005263998,1,***
15,Eyelink,SVM,DS_full,2.76958118780598e-06,4.68720750412133,0.867116638321995,0.789824263038549,4.154372e-05,1,***
19,EyeSeeCam,LogR,DS,0.199261778592184,1.2836576215705,0.902954931972789,0.882688492063492,1.0,0,


## Response-interval-based classification

In [11]:
pred.ri <- pred[pred$model %in% classifiers & pred$trace == 'intervals' ,]

# Eye tracker comparison per classifier, using 95% CI for AUC
rocs.interval <- list()
for(tracker in devices) {
  
    rocs.interval[[tracker]] <- list()
  
    for (model in classifiers) {
        
        mroc <- roc(pred.ri$true_val[pred.ri$device == tracker & pred.ri$model == model], 
                    pred.ri$pred_prob[pred.ri$device == tracker & pred.ri$model == model],
                    levels=c(0, 1), 
                    dir='<')
        rocs.interval[[tracker]][[model]] <- mroc
    }
}

# Compare all models using roc.test
final.intervals <- data.frame()
for(tracker in devices) {
    for (model1 in classifiers) {
        for (model2 in classifiers) {
            roc.cmp <- roc.test(rocs.interval[[tracker]][[model1]], rocs.interval[[tracker]][[model2]])
            final.intervals <- rbind(final.intervals, c(tracker, model1, model2, roc.cmp$p.value[1], roc.cmp$statistic[1], roc.cmp$roc1$auc[1], roc.cmp$roc2$auc[1]))
        }
    }
}
colnames(final.intervals) <- c('tracker', 'model1', 'model2', 'p', 'z', 'auc1', 'auc2')

# Multiple comparisons correction, within each tracker
for(tracker in devices) {
  final.intervals$p.holm[final.intervals$tracker == tracker] <- p.adjust(final.intervals$p[final.intervals$tracker == tracker], method='holm')
}
final.intervals$different <- 0
final.intervals$different[final.intervals$p.holm < 0.05] <- 1
final.intervals$significance <- ''
final.intervals$significance[final.intervals$p.holm < 0.05] <- '*'
final.intervals$significance[final.intervals$p.holm < 0.01] <- '**'
final.intervals$significance[final.intervals$p.holm < 0.001] <- '***'

# Drop self-comparisons
final.intervals <- final.intervals[final.intervals$z > 0,]

# Save comparison results
write.table(final.intervals, file.path(folder.in, 'cls_comp_intervals.csv'), sep='\t', row.names=F)

# Show table of model comparisons
final.intervals

Unnamed: 0_level_0,tracker,model1,model2,p,z,auc1,auc2,p.holm,different,significance
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>
4,Eyelink,kNN,LogR,0.18209748860721,1.33432463125953,0.768543615362812,0.759460034013605,1.0,0,
6,Eyelink,kNN,SVM,0.796595696854183,0.257755387547498,0.768543615362812,0.766834077380952,1.0,0,
7,Eyelink,SVM,LogR,0.0013912825911465,3.19645348063139,0.766834077380952,0.759460034013605,0.01252154,1,*
13,EyeSeeCam,kNN,LogR,0.0001033069457594,3.88269068632325,0.826371173469388,0.805404974489796,0.0007231486,1,***
15,EyeSeeCam,kNN,SVM,0.750042460197601,0.318583377213816,0.826371173469388,0.824825503117914,1.0,0,
16,EyeSeeCam,SVM,LogR,2.96519664089958e-10,6.30058744624139,0.824825503117914,0.805404974489796,2.668677e-09,1,***
22,EyeTribe,kNN,LogR,0.0002203739564613,3.69443765198963,0.809709821428571,0.785395408163265,0.001983366,1,**
24,EyeTribe,kNN,SVM,0.0181703624464202,2.36212809803619,0.809709821428571,0.796573837868481,0.09085181,0,
25,EyeTribe,SVM,LogR,0.0002800510748757,3.63308650743231,0.796573837868481,0.785395408163265,0.001983366,1,**
