In [None]:
from __future__ import annotations

import argparse
import os
import random

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from scipy.stats import ttest_ind, ttest_1samp

from sp_eyegan.gpu_selection import select_gpu
from sp_eyegan.load_sb_sat_data import get_sb_sat_data
from sp_eyegan.model import contrastive_learner
import config as config

%pylab inline
%load_ext autoreload
%autoreload 2

In [2]:
result_path = config.CSV_RESULTS_FILE
result_pd = pd.read_csv(result_path)
result_pd.head()

Unnamed: 0,model_name,native_auc_fold_0,native_five_sec_eval_auc_fold_0,native_mean_auc,native_five_sec_eval_mean_auc,native_auc_fold_1,native_five_sec_eval_auc_fold_1,native_auc_fold_2,native_five_sec_eval_auc_fold_2,native_auc_fold_3,...,subj_acc_mean_auc,subj_acc_five_sec_eval_mean_auc,subj_acc_auc_fold_1,subj_acc_five_sec_eval_auc_fold_1,subj_acc_auc_fold_2,subj_acc_five_sec_eval_auc_fold_2,subj_acc_auc_fold_3,subj_acc_five_sec_eval_auc_fold_3,subj_acc_auc_fold_4,subj_acc_five_sec_eval_auc_fold_4
0,clrgaze_random_window_size_5000_sd_0.1_sd_fact...,0.772482,0.723738,0.704134,0.642978,0.744756,0.681491,0.696987,0.599407,0.811535,...,0.577182,0.562062,0.65312,0.631453,0.509897,0.513688,0.615912,0.593768,0.470133,0.477999
1,clrgaze_random_window_size_5000_sd_0.1_sd_fact...,0.379544,0.417033,0.435388,0.451921,0.369949,0.435426,0.429877,0.451161,0.419577,...,0.521517,0.516906,0.645077,0.570758,0.719624,0.633195,0.416424,0.470682,0.43329,0.451569
2,clrgaze_random_window_size_5000_sd_0.1_sd_fact...,0.715543,0.666576,0.577078,0.563607,0.592606,0.588508,0.582257,0.553661,0.577918,...,0.507073,0.498105,0.567882,0.56222,0.500525,0.531324,0.683008,0.608622,0.38367,0.395832
3,ekyt_random_window_size_5000_sd_0.1_sd_factor_...,0.811293,0.421786,0.720511,0.415439,0.769759,0.33323,0.698297,0.57541,0.856138,...,0.622338,0.440753,0.569575,0.403967,0.633327,0.475842,0.718183,0.270929,0.531528,0.632889
4,ekyt_random_window_size_5000_sd_0.1_sd_factor_...,0.604411,0.548971,0.566252,0.555862,0.600338,0.586278,0.596637,0.598212,0.565429,...,0.514284,0.522177,0.447327,0.490419,0.478426,0.489635,0.518834,0.534946,0.517061,0.539234


In [3]:
problems = ['native','difficulty','acc','subj_acc']
names    = ['Native Reader',
            'Text Difficulty',
            'Text Comprehension',
            'General Reading Comprehension',
           ]

name_mapping = {'clrgaze':'CLRGaze',
                'ekyt':'EKYT'}

In [4]:
folds = 5
flag_skip_scratch = True
popmean = 0.5

for i in range(len(problems)):
    cur_prob = problems[i]
    cur_name = names[i]
    #cur_col = cur_prob + '_five_sec_eval_mean_auc'
    cur_col = cur_prob + '_mean_auc'
    model_names = list(result_pd['model_name'])
    values = list(result_pd[cur_col])
    fold_values = []
    for fold_i in range(folds):
        cur_col = cur_prob + '_auc_fold_' + str(fold_i)
        fold_values.append(list(result_pd[cur_col]))
    fold_values = np.array(fold_values).T
    print('problem: ' + str(cur_name))
    name_values_dict = dict()
    for j in range(len(values)):
        cur_model_name = model_names[j]
        cur_mean = np.round(np.mean(fold_values[j,:]),decimals=3)
        cur_stderr = np.round(np.std(fold_values[j,:])/np.sqrt(folds),decimals=3)
        if cur_model_name[0].isupper():
            cur_model_name = cur_model_name
        elif '_scratch' in cur_model_name:
            if flag_skip_scratch:
                continue
            cur_model_name = cur_model_name.split('_scratch')[0]
        else:
            if 'rf' in cur_model_name:
                cur_model_name = name_mapping[cur_model_name.split('_')[0]] + ' (CP) random forest'
            else:
                cur_model_name = name_mapping[cur_model_name.split('_')[0]] + ' (CP) fine-tuning'
        flag_add = True
        if cur_model_name in name_values_dict:
            flag_add = False
            mean, values, name = name_values_dict[cur_model_name]
            if cur_mean > mean:
                flag_add = True
        if flag_add:
            name_values_dict[cur_model_name] = (cur_mean, fold_values[j,:], model_names[j])
        #print('    ' + cur_model_name + ': ' + str(cur_mean) +\
        #     ' $\pm$ ' + str(cur_stderr))
    for model_name in name_values_dict:
        mean, values, name = name_values_dict[model_name]
        cur_mean = np.round(np.mean(values),decimals=3)
        cur_stderr = np.round(np.std(values)/np.sqrt(len(values)),decimals=3)
        # tests if better than random guessing
        better_random_pvalue = ttest_1samp(a=values,popmean=popmean,alternative = 'greater').pvalue
        if better_random_pvalue < 0.05:
            cur_p_value_add_str = '*'
        else:
            cur_p_value_add_str = ''
        
        # test if better than w/o pre-training
        if model_name != 'EKYT' and model_name != 'CLRGaze':
            if model_name.lower().startswith('ekyt'):
                base_values = name_values_dict['EKYT'][1]
            elif model_name.lower().startswith('clrgaze'):
                base_values = name_values_dict['CLRGaze'][1]
        tt_test_pvalue = ttest_ind(base_values,values,alternative='two-sided').pvalue
        if tt_test_pvalue < 0.05:
            cur_p_value_add_str += '$\dagger$'
        else:
            cur_p_value_add_str += ''
        
        print('    ' + model_name + ' & ' + str(cur_mean) +\
             ' $\pm$ ' + str(cur_stderr) + cur_p_value_add_str)# + ' ' + str(name))

problem: Native Reader
    CLRGaze (CP) fine-tuning & 0.704 $\pm$ 0.05*$\dagger$
    CLRGaze (CP) random forest & 0.577 $\pm$ 0.042
    EKYT (CP) fine-tuning & 0.721 $\pm$ 0.061*$\dagger$
    EKYT (CP) random forest & 0.556 $\pm$ 0.057
    EKYT & 0.55 $\pm$ 0.014*
    CLRGaze & 0.528 $\pm$ 0.046
problem: Text Difficulty
    CLRGaze (CP) fine-tuning & 0.566 $\pm$ 0.018*
    CLRGaze (CP) random forest & 0.488 $\pm$ 0.016
    EKYT (CP) fine-tuning & 0.545 $\pm$ 0.006*
    EKYT (CP) random forest & 0.501 $\pm$ 0.012
    EKYT & 0.494 $\pm$ 0.021
    CLRGaze & 0.516 $\pm$ 0.034
problem: Text Comprehension
    CLRGaze (CP) fine-tuning & 0.592 $\pm$ 0.032*
    CLRGaze (CP) random forest & 0.562 $\pm$ 0.026*
    EKYT (CP) fine-tuning & 0.574 $\pm$ 0.024*
    EKYT (CP) random forest & 0.597 $\pm$ 0.026*
    EKYT & 0.566 $\pm$ 0.02*
    CLRGaze & 0.56 $\pm$ 0.055
problem: General Reading Comprehension
    CLRGaze (CP) fine-tuning & 0.577 $\pm$ 0.033
    CLRGaze (CP) random forest & 0.507 $\pm$ 0.