# Eyettention

In [None]:
import os
import numpy as np
import model
import torch
from torch.utils import model_zoo
import pandas as pd
from utils import *
from sklearn.model_selection import StratifiedKFold, KFold
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import Adam, RMSprop
from transformers import BertTokenizerFast
from model import Eyettention_readerID
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from torch.nn.functional import cross_entropy, softmax
from collections import deque
import pickle
import json
import matplotlib.pyplot as plt
import argparse
import random
from scasim import *
from transformers import BertTokenizer

In [None]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"
#DEVICE = 'cuda'
DEVICE = 'cpu'
scanpath_gen_flag = True
atten_type = "local_g"
save_data_folder = "./drive/MyDrive/results/BSC/Eyettention_Reader/emb_size_64"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Training loop**

In [None]:

if __name__ == '__main__':
	gpu = 0

	torch.set_default_tensor_type('torch.FloatTensor')
	availbl = torch.cuda.is_available()
	if availbl:
		device = f'cuda:{gpu}'
	else:
		device = 'cpu'
	#torch.cuda.set_device(gpu)

	cf = {"model_pretrained": "bert-base-chinese",
			"lr": 1e-3,
			"max_grad_norm": 10,
			"n_epochs": 150,  # 1000
			"n_folds": 5,
			"dataset": 'BSC',
			"atten_type": 'local-g',
			"subid_emb_size": 64, # 32, 64
			"batch_size": 256,
			"max_sn_len": 27, #include start token and end token
			"max_sp_len": 40, #include start token and end token
			"norm_type": "z-score",
			"earlystop_patience": 20,
			"max_pred_len": 60
			}

	#Encode the label into interger categories, setting the exclusive category 'cf["max_sn_len"]-1' as the end sign
	le = LabelEncoder()
	le.fit(np.append(np.arange(-cf["max_sn_len"]+3, cf["max_sn_len"]-1), cf["max_sn_len"]-1))
	#le.classes_

	#load corpus
	word_info_df, pos_info_df, eyemovement_df = load_corpus(cf["dataset"])
	#Make list with sentence index
	sn_list = np.unique(eyemovement_df.sn.values).tolist()
	#Make list with reader index
	reader_list = np.unique(eyemovement_df.id.values).tolist()

	print('Start evaluating on new sentences.')
	split_list = sn_list

	n_folds = cf["n_folds"]
	kf = KFold(n_splits=n_folds, shuffle=True, random_state=0)
	fold_indx = 0
	#for scanpath generation
	sp_dnn_list = []
	sp_human_list = []
	for train_idx, test_idx in kf.split(split_list):
		loss_dict = {'val_loss':[], 'train_loss':[], 'test_ll':[], 'test_ll_SE':[], 'test_mse_dur':[], 'test_mse_dur_SE':[], 'test_mse_land_pos':[], 'test_mse_land_pos_SE':[], 'central_scasim_dnn':[], 'central_scasim_dnn_SE':[], 'central_scasim_human':[], 'central_scasim_human_SE':[], 'scasim_dnn':[], 'scasim_dnn_SE':[], 'scasim_human':[], 'scasim_human_SE':[]}
		list_train = [split_list[i] for i in train_idx]
		list_test = [split_list[i] for i in test_idx]

		# create train validation split for training the models:
		kf_val = KFold(n_splits=n_folds, shuffle=True, random_state=0)
		for train_index, val_index in kf_val.split(list_train):
			# we only evaluate a single fold
			break
		list_train_net = [list_train[i] for i in train_index]
		list_val_net = [list_train[i] for i in val_index]

		sn_list_train = list_train_net
		sn_list_val = list_val_net
		sn_list_test = list_test
		reader_list_train, reader_list_val, reader_list_test = reader_list, reader_list, reader_list

		#initialize tokenizer
		tokenizer = BertTokenizer.from_pretrained(cf['model_pretrained'])
		#Preparing batch data
		dataset_train = BSCdataset(word_info_df, eyemovement_df, cf, reader_list_train, sn_list_train, tokenizer)
		train_dataloaderr = DataLoader(dataset_train, batch_size = cf["batch_size"], shuffle = True, drop_last=True)

		dataset_val = BSCdataset(word_info_df, eyemovement_df, cf, reader_list_val, sn_list_val, tokenizer)
		val_dataloaderr = DataLoader(dataset_val, batch_size = cf["batch_size"], shuffle = False, drop_last=True)

		dataset_test = BSCdataset(word_info_df, eyemovement_df, cf, reader_list_test, sn_list_test, tokenizer)
		test_dataloaderr = DataLoader(dataset_test, batch_size = cf["batch_size"], shuffle = False, drop_last=False)

		#z-score normalization for gaze features
		fix_dur_mean, fix_dur_std = calculate_mean_std(dataloader=train_dataloaderr, feat_key="sp_fix_dur", padding_value=0, scale=1000)
		landing_pos_mean, landing_pos_std = calculate_mean_std(dataloader=train_dataloaderr, feat_key="sp_landing_pos", padding_value=0)
		sn_word_len_mean, sn_word_len_std = calculate_mean_std(dataloader=train_dataloaderr, feat_key="sn_word_len")

		# load model
		dnn = Eyettention_readerID(cf)

		#training
		episode = 0
		optimizer = Adam(dnn.parameters(), lr=cf["lr"])
		dnn.train()
		dnn.to(device)
		av_score = deque(maxlen=100)
		av_location_score = deque(maxlen=100)
		av_duration_score = deque(maxlen=100)
		av_land_pos_score = deque(maxlen=100)
		old_score = 1e10
		save_ep_couter = 0
		print('Start training')
		print("fold_indx", fold_indx)
		for episode_i in range(episode, cf["n_epochs"]+1):
			dnn.train()
			print('episode:', episode_i)
			counter = 0
			for batchh in train_dataloaderr:
				counter += 1
				batchh.keys()
				sn_ids = batchh["sn_ids"].to(device)
				sn_input_ids = batchh["sn_input_ids"].to(device)
				sn_attention_mask = batchh["sn_attention_mask"].to(device)
				sp_input_ids = batchh["sp_input_ids"].to(device)
				sp_attention_mask = batchh["sp_attention_mask"].to(device)
				sp_pos = batchh["sp_pos"].to(device)
				sp_landing_pos = batchh["sp_landing_pos"].to(device) # [256, 40]
				sp_fix_dur = (batchh["sp_fix_dur"]/1000).to(device) # [256, 40]
				sn_word_len = batchh["sn_word_len"].to(device)
				sub_id = batchh["sub_id"].to(device)


				# normalize gaze features (z-score normalisation)
				mask = ~torch.eq(sp_fix_dur, 0)
				sp_fix_dur = (sp_fix_dur-fix_dur_mean)/fix_dur_std * mask
				sp_fix_dur = torch.nan_to_num(sp_fix_dur) # [256, 40]
				sp_landing_pos = (sp_landing_pos - landing_pos_mean)/landing_pos_std * mask
				sp_landing_pos = torch.nan_to_num(sp_landing_pos)
				sn_word_len = (sn_word_len - sn_word_len_mean)/sn_word_len_std
				sn_word_len = torch.nan_to_num(sn_word_len)

				# zero old gradients
				optimizer.zero_grad()
				# predict output with DNN
				location_preds, duration_preds, landing_pos_preds, atten_weights = dnn(sn_emd=sn_input_ids,
											sn_mask=sn_attention_mask,
											sp_emd=sp_input_ids,
											sp_pos=sp_pos,
											word_ids_sn=None,
											word_ids_sp=None,
											sp_fix_dur=sp_fix_dur,
											sp_landing_pos=sp_landing_pos,
											sn_word_len = sn_word_len,
											sub_id = sub_id
											                                            )#[batch, step, dec_o_dim]

				location_preds = location_preds.permute(0,2,1)              #[batch, dec_o_dim, step]

				#prepare label and mask
				# Compute loss for fixation locations
				pad_mask, label = load_label(sp_pos, cf, le, device)
				loss = nn.CrossEntropyLoss(reduction="none")
				batch_location_error = torch.mean(torch.masked_select(loss(location_preds, label), ~pad_mask))

				# Compute loss for fixation durations
				duration_labels = sp_fix_dur[:, :39] # Adjust duration_labels to match the sequence length of duration_preds
				duration_preds = duration_preds.squeeze(-1)  # Remove extra dimension (from [256, 39, 1] to [256, 39])
				dur_loss = nn.MSELoss(reduction="none")
				batch_duration_error = torch.mean(dur_loss(duration_preds, duration_labels))

				# Compute loss for landing position
				landing_pos_labels = sp_landing_pos[:, :39] # Adjust duration_labels to match the sequence length of duration_preds
				landing_pos_preds = landing_pos_preds.squeeze(-1)  # Remove extra dimension (from [256, 39, 1] to [256, 39])
				land_pos_loss = nn.MSELoss(reduction="none")
				batch_land_pos_error = torch.mean(land_pos_loss(landing_pos_preds, landing_pos_labels))

				# Combined loss for both location and duration
				batch_error = batch_location_error + batch_duration_error + batch_land_pos_error

				# backpropagate loss
				batch_error.backward()
				# clip gradients
				gradient_clipping(dnn, cf["max_grad_norm"])

				#learn
				optimizer.step()
				av_location_score.append(batch_location_error.to('cpu').detach().numpy())
				av_duration_score.append(batch_duration_error.to('cpu').detach().numpy())
				av_land_pos_score.append(batch_land_pos_error.to('cpu').detach().numpy())
				av_score.append(batch_error.to('cpu').detach().numpy())
				print('counter:',counter)
				print('\rSample {}\tLocation Loss: {:.10f}\tDuration Loss: {:.10f}\tLanding position Loss: {:.10f}'.format(
          counter, np.mean(av_location_score), np.mean(av_duration_score), np.mean(av_land_pos_score)), end=" ")
			loss_dict['train_loss'].append(np.mean(av_score))

			location_val_loss = []
			duration_val_loss = []
			land_pos_val_loss = []
			val_loss = []
			dnn.eval()
			for batchh in val_dataloaderr:
				with torch.no_grad():
					sn_ids_val = batchh["sn_ids"].to(device)
					sn_input_ids_val = batchh["sn_input_ids"].to(device)
					sn_attention_mask_val = batchh["sn_attention_mask"].to(device)
					sp_input_ids_val = batchh["sp_input_ids"].to(device)
					sp_attention_mask_val = batchh["sp_attention_mask"].to(device)
					sp_pos_val = batchh["sp_pos"].to(device)
					sp_landing_pos_val = batchh["sp_landing_pos"].to(device)
					sp_fix_dur_val = (batchh["sp_fix_dur"]/1000).to(device)
					sn_word_len_val = batchh["sn_word_len"].to(device)
					sub_id_val = batchh["sub_id"].to(device)

					#normalize gaze features
					mask = ~torch.eq(sp_fix_dur_val, 0)
					sp_fix_dur_val = (sp_fix_dur_val-fix_dur_mean)/fix_dur_std * mask
					sp_landing_pos_val = (sp_landing_pos_val - landing_pos_mean)/landing_pos_std * mask
					sp_fix_dur_val = torch.nan_to_num(sp_fix_dur_val)
					sp_landing_pos_val = torch.nan_to_num(sp_landing_pos_val)
					sn_word_len_val = (sn_word_len_val - sn_word_len_mean)/sn_word_len_std
					sn_word_len_val = torch.nan_to_num(sn_word_len_val)

					location_preds_val, duration_preds_val, landing_pos_preds_val, atten_weights_val = dnn(sn_emd=sn_input_ids_val,
														sn_mask=sn_attention_mask_val,
														sp_emd=sp_input_ids_val,
														sp_pos=sp_pos_val,
														word_ids_sn=None,
														word_ids_sp=None,
														sp_fix_dur=sp_fix_dur_val,
														sp_landing_pos=sp_landing_pos_val,
														sn_word_len = sn_word_len_val,
														sub_id = sub_id_val)#[batch, step, dec_o_dim]
					location_preds_val = location_preds_val.permute(0,2,1)              #[batch, dec_o_dim, step

					# Compute location prediction error
					loss = nn.CrossEntropyLoss(reduction="none")
					pad_mask_val, label_val = load_label(sp_pos_val, cf, le, device)
					location_error_val = torch.mean(torch.masked_select(loss(location_preds_val, label_val), ~pad_mask_val))
					location_val_loss.append(location_error_val.detach().to('cpu').numpy())

					# Compute duration prediction error
					duration_labels_val = sp_fix_dur_val[:, :39] # Adjust duration_labels to match the sequence length of duration_preds
					duration_preds_val = duration_preds_val.squeeze(-1)
					duration_error_val = torch.mean(dur_loss(duration_preds_val, duration_labels_val))
					duration_val_loss.append(duration_error_val.detach().to('cpu').numpy())

					# Compute loss for landing position
					landing_pos_labels_val = sp_landing_pos_val[:, :39] # Adjust duration_labels to match the sequence length of duration_preds
					landing_pos_preds_val = landing_pos_preds_val.squeeze(-1)  # Remove extra dimension (from [256, 39, 1] to [256, 39])
					land_pos_error_val = torch.mean(land_pos_loss(landing_pos_preds_val, landing_pos_labels_val))
					land_pos_val_loss.append(land_pos_error_val.detach().to('cpu').numpy())

					combined_loss = location_error_val + duration_error_val + land_pos_error_val
					val_loss.append(combined_loss.detach().to('cpu').numpy())

			print('\nValidation loss for locations {} \n'.format(np.mean(location_val_loss)))
			print('\nValidation loss for duration {} \n'.format(np.mean(duration_val_loss)))
			print('\nValidation loss for landing position {} \n'.format(np.mean(land_pos_val_loss)))
			loss_dict['val_loss'].append(np.mean(val_loss))

			if np.mean(val_loss) < old_score:
				# save model if val loss is smallest
				torch.save(dnn.state_dict(), '{}/BSC_Eyettention_Reader_{}_Fold{}}.pth'.format(save_data_folder, cf["subid_emb_size"], fold_indx))
				old_score = np.mean(val_loss)
				print('\nsaved model state dict\n')
				save_ep_couter = episode_i
			else:
				#early stopping
				if episode_i - save_ep_couter >= cf["earlystop_patience"]:
					break
		fold_indx += 1

		#evaluation
		dnn.eval()
		res_llh=[]
		res_mse_dur = []
		res_mse_land_pos = []
		res_central_scasim_human = []
		res_central_scasim_dnn = []
		res_scasim_human = []
		res_scasim_dnn = []
		dnn.load_state_dict(torch.load(os.path.join(save_data_folder, f'BSC_Eyettention_Reader_{cf["subid_emb_size"]}_Fold{fold_indx}.pth'), map_location='cpu'))
		dnn.to(device)
		batch_indx = 0
		print("Evaluating for fold", fold_indx)
		for batchh in test_dataloaderr:
			with torch.no_grad():
				sn_ids_test = batchh["sn_ids"].to(device)
				sn_input_ids_test = batchh["sn_input_ids"].to(device)
				sn_attention_mask_test = batchh["sn_attention_mask"].to(device)
				sp_input_ids_test = batchh["sp_input_ids"].to(device)
				sp_attention_mask_test = batchh["sp_attention_mask"].to(device)
				sp_pos_test = batchh["sp_pos"].to(device) # 28: '<Sep>', 29: '<'Pad'>'
				sp_landing_pos_test = batchh["sp_landing_pos"].to(device)
				sp_fix_dur_test = (batchh["sp_fix_dur"]/1000).to(device)
				sn_word_len_test = batchh["sn_word_len"].to(device)
				sub_id_test = batchh["sub_id"].to(device)


				#normalize gaze features
				mask = ~torch.eq(sp_fix_dur_test, 0)
				sp_fix_dur_test = (sp_fix_dur_test-fix_dur_mean)/fix_dur_std * mask
				sp_landing_pos_test = (sp_landing_pos_test - landing_pos_mean)/landing_pos_std * mask
				sp_fix_dur_test = torch.nan_to_num(sp_fix_dur_test)
				sp_landing_pos_test = torch.nan_to_num(sp_landing_pos_test)
				sn_word_len_test = (sn_word_len_test - sn_word_len_mean)/sn_word_len_std
				sn_word_len_test = torch.nan_to_num(sn_word_len_test)

				location_preds_test, duration_preds_test, landing_pos_preds_test, atten_weights_test = dnn(sn_emd=sn_input_ids_test,
														sn_mask=sn_attention_mask_test,
														sp_emd=sp_input_ids_test,
														sp_pos=sp_pos_test,
														word_ids_sn=None,
														word_ids_sp=None,
														sp_fix_dur=sp_fix_dur_test,
														sp_landing_pos=sp_landing_pos_test,
														sn_word_len = sn_word_len_test,
														sub_id = sub_id_test
														) #[batch, step, dec_o_dim]


				########## Evaluate location predictions ##########
				m = nn.Softmax(dim=2)
				location_preds_test = m(location_preds_test).detach().to('cpu').numpy()

				#prepare label and mask
				pad_mask_test, label_test = load_label(sp_pos_test, cf, le, 'cpu')
				#compute log likelihood for the batch samples
				res_batch = eval_log_llh(location_preds_test, label_test, pad_mask_test)
				res_llh.append(np.array(res_batch))

				print("######### Eyettention Reader 2.0 model evaluation ##########")
				duration_preds_test = duration_preds_test.squeeze(-1)
				duration_labels_test = sp_fix_dur_test[:, :39]
				test_mask = mask[:, :39]
				mse_dur = eval_mse(duration_preds_test, duration_labels_test, test_mask)
				print("MSE for durations", np.mean(mse_dur))
				res_mse_dur.append(np.array(mse_dur))

				landing_pos_preds_test = landing_pos_preds_test.squeeze(-1)
				landing_pos_labels_test = sp_landing_pos_test[:, :39]
				mse_landing_pos = eval_mse(landing_pos_preds_test, landing_pos_labels_test, test_mask)
				print("MSE for landing positions", np.mean(mse_landing_pos))
				res_mse_land_pos.append(np.array(mse_landing_pos))


				if bool(scanpath_gen_flag) == True:
					sn_len = (torch.sum(sn_attention_mask_test, axis=1) - 2).detach().to('cpu').numpy()
					# compute the scan path generated from the model when the first CLS token is given
					sp_dnn, _, dur_dnn, land_pos_dnn = dnn.scanpath_generation(sn_emd=sn_input_ids_test,
														 sn_mask=sn_attention_mask_test,
														 word_ids_sn=None,
														 sn_word_len = sn_word_len_test,
														 le=le,
														 sp_fix_dur=sp_fix_dur_test,
														 sp_landing_pos = sp_landing_pos_test,
														 sub_id = sub_id_test,
														 max_pred_len=cf['max_pred_len'])

					sp_dnn, sp_human = prepare_scanpath(sp_dnn.detach().to('cpu').numpy(),
                                              dur_dnn.detach().to('cpu').numpy(),
																							land_pos_dnn.detach().to('cpu').numpy(),
																							sn_len, sp_pos_test,
                                              sp_fix_dur_test, sp_landing_pos_test, cf, sn_ids_test,
																							fix_dur_mean, fix_dur_std, landing_pos_mean, landing_pos_std)

					sp_dnn_list.extend(sp_dnn)
					sp_human_list.extend(sp_human)

					sp_dnn = convert_sp_to_lists(sp_dnn)
					sp_human = convert_sp_to_lists(sp_human)
					sp_human = modify_landing_pos(sp_human.copy())
					sp_dnn = modify_landing_pos(sp_dnn.copy())
					random_sp = sample_random_sp("BSC", sp_human)
					random_sp = convert_sp_to_lists(random_sp)
					random_sp = modify_landing_pos(random_sp.copy())

					scasim_scores_dnn = compute_scasim(sp_dnn, sp_human)
					res_scasim_dnn.append(scasim_scores_dnn)
					print("Mean scasim dnn", np.mean(scasim_scores_dnn))
					scasim_scores_human = compute_scasim(sp_human, random_sp)
					res_scasim_human.append(scasim_scores_human)
					print("Mean scasim human", np.mean(scasim_scores_human))

					central_scasim_scores_dnn = compute_central_scasim("BSC_most_central_sp.txt", sp_dnn)
					central_scasim_scores_human = compute_central_scasim("BSC_most_central_sp.txt", sp_human)
					res_central_scasim_dnn.append(np.array(central_scasim_scores_dnn))
					res_central_scasim_human.append(np.array(central_scasim_scores_human))
					print("Mean central scasim dnn", np.mean(central_scasim_scores_dnn))
					print("Mean central scasim human", np.mean(central_scasim_scores_human))

				batch_indx +=1

		res_llh = np.concatenate(res_llh).ravel()
		loss_dict['test_ll'].append(res_llh)
		res_mse_dur = np.concatenate(res_mse_dur).ravel()
		loss_dict['test_mse_dur'].append(res_mse_dur)
		res_mse_land_pos = np.concatenate(res_mse_land_pos).ravel()
		loss_dict['test_mse_land_pos'].append(res_mse_land_pos)

		res_central_scasim_dnn = np.concatenate(res_central_scasim_dnn).ravel()
		loss_dict['central_scasim_dnn'].append(res_central_scasim_dnn)
		res_central_scasim_human = np.concatenate(res_central_scasim_human).ravel()
		loss_dict['central_scasim_human'].append(res_central_scasim_human)
		res_scasim_dnn = np.concatenate(res_scasim_dnn).ravel()
		loss_dict['scasim_dnn'].append(res_scasim_dnn)
		res_scasim_human = np.concatenate(res_scasim_human).ravel()
		loss_dict['scasim_human'].append(res_scasim_human)

		loss_dict['fix_dur_mean'] = fix_dur_mean
		loss_dict['fix_dur_std'] = fix_dur_std
		loss_dict['landing_pos_mean'] = landing_pos_mean
		loss_dict['landing_pos_std'] = landing_pos_std
		loss_dict['sn_word_len_mean'] = sn_word_len_mean
		loss_dict['sn_word_len_std'] = sn_word_len_std

		print('Test likelihood is {}'.format(np.mean(res_llh)))
		loss_dict['test_ll_SE'].append(np.std(res_llh)/ np.sqrt(len(res_llh)))
		print("Standard error for NLL", np.std(res_llh)/ np.sqrt(len(res_llh)))

		print('Test MSE for durations is {}'.format(np.mean(res_mse_dur)))
		loss_dict['test_mse_dur_SE'].append(np.std(res_mse_dur)/ np.sqrt(len(res_mse_dur)))
		print("Standard error for MSE dur", np.std(res_mse_dur) / np.sqrt(len(res_mse_dur)))

		print('Test MSE for landing positions is {}'.format(np.mean(res_mse_land_pos)))
		loss_dict['test_mse_land_pos_SE'].append(np.std(res_mse_land_pos)/ np.sqrt(len(res_mse_land_pos)))
		print("Standard error for MSE land pos", np.std(res_mse_land_pos) / np.sqrt(len(res_mse_land_pos)))

		print("Central Scasim dnn", np.mean(loss_dict['central_scasim_dnn']))
		loss_dict['central_scasim_dnn_SE'].append(np.std(res_central_scasim_dnn)/ np.sqrt(len(res_central_scasim_dnn)))
		print("Standard error for Central scasim DNN", np.std(res_central_scasim_dnn) / np.sqrt(len(res_central_scasim_dnn)))

		print("Central Scasim human", np.mean(loss_dict['central_scasim_human']))
		loss_dict['central_scasim_human_SE'].append(np.std(res_central_scasim_human)/ np.sqrt(len(res_central_scasim_human)))
		print("Standard error for Central scasim human", np.std(res_central_scasim_human) / np.sqrt(len(res_central_scasim_human)))

		print("Scasim dnn", np.mean(loss_dict['scasim_dnn']))
		loss_dict['scasim_dnn_SE'].append(np.std(res_scasim_dnn)/ np.sqrt(len(res_scasim_dnn)))
		print("Standard error for scasim dnn", np.std(res_scasim_dnn) / np.sqrt(len(res_scasim_dnn)))

		print("Scasim human", np.mean(loss_dict['scasim_human']))
		loss_dict['scasim_human_SE'].append(np.std(res_scasim_human)/ np.sqrt(len(res_scasim_human)))
		print("Standard error for scasim human", np.std(res_scasim_human) / np.sqrt(len(res_scasim_human)))

		#save results
		with open('{}/res_BSC_eyettention_reader_Fold{}.pickle'.format(save_data_folder, fold_indx), 'wb') as handle:
			pickle.dump(loss_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
		fold_indx += 1


Start evaluating on new sentences.




keeping Bert with pre-trained weights
Evaluating for fold 0
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.020104618552977627
MSE for landing positions 0.014165729791784543


  item = torch.tensor(item)


Mean scasim dnn 1345.546875
Mean scasim human 1958.22265625
Mean central scasim dnn 985.35546875
Mean central scasim human 1232.12109375
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.022530937165811338
MSE for landing positions 0.014331071230344605


  item = torch.tensor(item)


Mean scasim dnn 1278.9921875
Mean scasim human 1601.046875
Mean central scasim dnn 958.51953125
Mean central scasim human 1223.3046875
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.02343079150136873
MSE for landing positions 0.015731161121038895


  item = torch.tensor(item)


Mean scasim dnn 1590.37109375
Mean scasim human 1849.6796875
Mean central scasim dnn 1328.69140625
Mean central scasim human 1529.265625
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.027691648410836933
MSE for landing positions 0.016114772654646004


  item = torch.tensor(item)


Mean scasim dnn 1496.53125
Mean scasim human 1804.27734375
Mean central scasim dnn 1109.80859375
Mean central scasim human 1427.4375
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.03810682073253702
MSE for landing positions 0.017794159804452647


  item = torch.tensor(item)


Mean scasim dnn 1651.84375
Mean scasim human 2024.109375
Mean central scasim dnn 1244.7109375
Mean central scasim human 1555.453125
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.028498677995230537
MSE for landing positions 0.017725731196151173


  item = torch.tensor(item)


Mean scasim dnn 1587.30078125
Mean scasim human 2330.9921875
Mean central scasim dnn 1148.3203125
Mean central scasim human 1515.46875
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.02699753182016082
MSE for landing positions 0.015924101622320365


  item = torch.tensor(item)


Mean scasim dnn 1571.1317829457364
Mean scasim human 2106.9922480620153
Mean central scasim dnn 998.5891472868217
Mean central scasim human 1486.9767441860465
Test likelihood is -1.7063532229504164
Standard error for NLL 0.01668078753773475
Test MSE for durations is 0.026748189886272592
Standard error for MSE dur 0.0016587339361378115
Test MSE for landing positions is 0.01597299778599057
Standard error for MSE land pos 0.00031287554719322194
Central Scasim dnn 1119.1123123123123
Standard error for Central scasim DNN 11.15437896429574
Central Scasim human 1419.5081081081082
Standard error for Central scasim human 15.870360984482755
Scasim dnn 1497.9135135135134
Standard error for scasim dnn 16.59213294453015
Scasim human 1941.9183183183184
Standard error for scasim human 18.59812566758492




keeping Bert with pre-trained weights
Evaluating for fold 1
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.02403234313271696
MSE for landing positions 0.01497217992550759


  item = torch.tensor(item)


Mean scasim dnn 1333.11328125
Mean scasim human 2131.95703125
Mean central scasim dnn 1019.87890625
Mean central scasim human 1258.2109375
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.029464269882737426
MSE for landing positions 0.017094335168167163


  item = torch.tensor(item)


Mean scasim dnn 1572.78515625
Mean scasim human 1881.42578125
Mean central scasim dnn 1213.7578125
Mean central scasim human 1504.40234375
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.02928394004902657
MSE for landing positions 0.018433299781463575


  item = torch.tensor(item)


Mean scasim dnn 1748.421875
Mean scasim human 2171.96875
Mean central scasim dnn 1234.58984375
Mean central scasim human 1668.89453125
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.023247076700954494
MSE for landing positions 0.016586749208272522


  item = torch.tensor(item)


Mean scasim dnn 1481.73046875
Mean scasim human 1984.28515625
Mean central scasim dnn 1149.61328125
Mean central scasim human 1400.32421875
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.025800976158734557
MSE for landing positions 0.0169989614860242


  item = torch.tensor(item)


Mean scasim dnn 1646.82421875
Mean scasim human 2034.140625
Mean central scasim dnn 1258.77734375
Mean central scasim human 1573.20703125
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.03426080622853078
MSE for landing positions 0.019135805991709276


  item = torch.tensor(item)


Mean scasim dnn 1826.82421875
Mean scasim human 2348.76953125
Mean central scasim dnn 1301.01171875
Mean central scasim human 1710.8203125
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.02511788024321983
MSE for landing positions 0.017311580996274164


  item = torch.tensor(item)


Mean scasim dnn 1618.9052631578948
Mean scasim human 2076.684210526316
Mean central scasim dnn 1125.9789473684211
Mean central scasim human 1601.6105263157895
Test likelihood is -1.6739646501951082
Standard error for NLL 0.01540914844591275
Test MSE for durations is 0.027532242878109923
Standard error for MSE dur 0.0009107881491094707
Test MSE for landing positions is 0.017209847378478842
Standard error for MSE land pos 0.00030188373714650293
Central Scasim dnn 1192.1771919068055
Standard error for Central scasim DNN 10.65965850346854
Central Scasim human 1524.1036174126302
Standard error for Central scasim human 17.039440717856284
Scasim dnn 1602.6235438381361
Standard error for scasim dnn 17.444006162214325
Scasim human 2091.193746167995
Standard error for scasim human 20.104282372591037




keeping Bert with pre-trained weights
Evaluating for fold 2
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.025730273594263053
MSE for landing positions 0.016699319310646388


  item = torch.tensor(item)


Mean scasim dnn 1524.95703125
Mean scasim human 2119.8046875
Mean central scasim dnn 1133.19140625
Mean central scasim human 1419.109375
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.03544972696499826
MSE for landing positions 0.017912964245397234


  item = torch.tensor(item)


Mean scasim dnn 1533.65234375
Mean scasim human 2118.5546875
Mean central scasim dnn 1104.0703125
Mean central scasim human 1503.671875
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.025673802474557306
MSE for landing positions 0.01679855188467627


  item = torch.tensor(item)


Mean scasim dnn 1613.53125
Mean scasim human 2262.96484375
Mean central scasim dnn 1121.18359375
Mean central scasim human 1532.1640625
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.027251346106822893
MSE for landing positions 0.017354263374727452


  item = torch.tensor(item)


Mean scasim dnn 1589.25390625
Mean scasim human 1938.1171875
Mean central scasim dnn 1234.8203125
Mean central scasim human 1504.34765625
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.02797846850626229
MSE for landing positions 0.01752608042488646


  item = torch.tensor(item)


Mean scasim dnn 1607.94140625
Mean scasim human 2551.515625
Mean central scasim dnn 1186.0
Mean central scasim human 1531.99609375
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.02959459999419778
MSE for landing positions 0.019071004137458658


  item = torch.tensor(item)


Mean scasim dnn 1749.19921875
Mean scasim human 2405.03515625
Mean central scasim dnn 1224.5703125
Mean central scasim human 1680.015625
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.024696816482160867
MSE for landing positions 0.016107698045341314


  item = torch.tensor(item)


Mean scasim dnn 1637.2477876106195
Mean scasim human 1908.070796460177
Mean central scasim dnn 1261.6194690265486
Mean central scasim human 1486.6017699115043
Test likelihood is -1.6659939160000978
Standard error for NLL 0.0146455686428165
Test MSE for durations is 0.028344671909403384
Standard error for MSE dur 0.001414337033659194
Test MSE for landing positions is 0.017460817964729192
Standard error for MSE land pos 0.0003895428018365338
Central Scasim dnn 1173.7689508793208
Standard error for Central scasim DNN 11.024375247623341
Central Scasim human 1525.6761673741662
Standard error for Central scasim human 16.59091610743915
Scasim dnn 1605.4299575500304
Standard error for scasim dnn 17.91386745705258
Scasim human 2210.422073984233
Standard error for scasim human 20.60278866120845




keeping Bert with pre-trained weights
Evaluating for fold 3
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.025644212037150282
MSE for landing positions 0.015716443847168193


  item = torch.tensor(item)


Mean scasim dnn 1466.37109375
Mean scasim human 1920.0078125
Mean central scasim dnn 1084.0703125
Mean central scasim human 1385.4296875
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.025572775271939463
MSE for landing positions 0.020074330001989438


  item = torch.tensor(item)


Mean scasim dnn 1556.17578125
Mean scasim human 2150.453125
Mean central scasim dnn 1153.57421875
Mean central scasim human 1447.48828125
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.01879255891753928
MSE for landing positions 0.013868569046735502


  item = torch.tensor(item)


Mean scasim dnn 1282.8125
Mean scasim human 1605.2265625
Mean central scasim dnn 944.5546875
Mean central scasim human 1212.3046875
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.03188676596801088
MSE for landing positions 0.019465651340397017


  item = torch.tensor(item)


Mean scasim dnn 1809.234375
Mean scasim human 2252.3671875
Mean central scasim dnn 1423.390625
Mean central scasim human 1737.0859375
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.025853953962723608
MSE for landing positions 0.01773369721013296


  item = torch.tensor(item)


Mean scasim dnn 1683.59765625
Mean scasim human 1987.1171875
Mean central scasim dnn 1300.44921875
Mean central scasim human 1509.62109375
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.027906900439802484
MSE for landing positions 0.01637170264575616


  item = torch.tensor(item)


Mean scasim dnn 1642.37109375
Mean scasim human 1843.7734375
Mean central scasim dnn 1275.25
Mean central scasim human 1541.3671875
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.02278064302173538
MSE for landing positions 0.016996121102925008


  item = torch.tensor(item)


Mean scasim dnn 1384.6734693877552
Mean scasim human 1700.591836734694
Mean central scasim dnn 1016.1428571428571
Mean central scasim human 1333.0
Test likelihood is -1.5992242306377342
Standard error for NLL 0.014576616192985467
Test MSE for durations is 0.02575320542533939
Standard error for MSE dur 0.000890126171575056
Test MSE for landing positions is 0.017192534122205963
Standard error for MSE land pos 0.00033525583182291417
Central Scasim dnn 1186.0416156670747
Standard error for Central scasim DNN 12.283164260057383
Central Scasim human 1463.8665850673194
Standard error for Central scasim human 16.186334167544775
Scasim dnn 1562.1064871481028
Standard error for scasim dnn 17.879959152947563
Scasim human 1944.2766217870258
Standard error for scasim human 19.14795062721589




keeping Bert with pre-trained weights
Evaluating for fold 4
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.021821808593585956
MSE for landing positions 0.01480568557440165


  item = torch.tensor(item)


Mean scasim dnn 1419.80078125
Mean scasim human 1942.66015625
Mean central scasim dnn 1081.12890625
Mean central scasim human 1402.1328125
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.024219427459684084
MSE for landing positions 0.014660380513305427


  item = torch.tensor(item)


Mean scasim dnn 1434.15234375
Mean scasim human 1664.2109375
Mean central scasim dnn 1053.109375
Mean central scasim human 1324.73828125
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.02776871210517129
MSE for landing positions 0.016509522770320473


  item = torch.tensor(item)


Mean scasim dnn 1543.578125
Mean scasim human 1957.50390625
Mean central scasim dnn 1200.55078125
Mean central scasim human 1507.52734375
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.02514451653087235
MSE for landing positions 0.016753797471665166


  item = torch.tensor(item)


Mean scasim dnn 1647.625
Mean scasim human 2263.51953125
Mean central scasim dnn 1283.92578125
Mean central scasim human 1566.34765625
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.026133705197480595
MSE for landing positions 0.017363432191359607


  item = torch.tensor(item)


Mean scasim dnn 1583.36328125
Mean scasim human 2043.3125
Mean central scasim dnn 1099.59765625
Mean central scasim human 1513.3515625
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.029502922757501437
MSE for landing positions 0.017443085308968875


  item = torch.tensor(item)


Mean scasim dnn 1599.95703125
Mean scasim human 2396.36328125
Mean central scasim dnn 1280.4609375
Mean central scasim human 1544.7734375
######### Eyettention Reader 2.0 model evaluation ##########
MSE for durations 0.025914144987077677
MSE for landing positions 0.01729224621006333


  item = torch.tensor(item)


Mean scasim dnn 1593.8155339805826
Mean scasim human 1822.2815533980583
Mean central scasim dnn 1223.2621359223301
Mean central scasim human 1584.2621359223301
Test likelihood is -1.663116664937798
Standard error for NLL 0.014766350952615736
Test MSE for durations is 0.025774543410987616
Standard error for MSE dur 0.0007489008936140284
Test MSE for landing positions is 0.016321106003735174
Standard error for MSE land pos 0.0002996371923271074
Central Scasim dnn 1170.031726662599
Standard error for Central scasim DNN 10.564486795784891
Central Scasim human 1483.2519829164125
Standard error for Central scasim human 15.589180798345229
Scasim dnn 1541.5820622330689
Standard error for scasim dnn 16.64419453118263
Scasim human 2030.6241610738255
Standard error for scasim human 19.491783643051676
