In [26]:
import glob
import pandas as pd
import numpy as np

import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Reshape, GlobalAveragePooling1D
from keras.layers import Conv1D, MaxPooling1D
from keras.utils import np_utils
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score

from keras.layers import LSTM, Dense, Conv1D, TimeDistributed, Flatten, Activation, Dropout, Bidirectional,concatenate
from keras.callbacks import History, TensorBoard, Callback
import keras.initializers as KI
from keras.layers import BatchNormalization
from sklearn.metrics import f1_score, precision_score, recall_score

from sklearn.preprocessing import StandardScaler
import scipy.stats as stats
import warnings
warnings.filterwarnings("ignore")

from tqdm.keras import TqdmCallback

In [2]:
#merge the gaze labelled data for each participant and document
gaze_path = "../../data/working/gaze_labelled/"
gaze_list = [pd.read_csv(data, index_col=0) for data in sorted(glob.glob(gaze_path + "/*/*"))]
gaze_data = pd.concat(gaze_list, ignore_index=True, sort=False)

In [3]:
tf.config.experimental.set_visible_devices([], 'GPU')

In [15]:
gaze_data.columns
# features = ['left_gaze_x', 'left_gaze_y',
#        'left_position_x', 'left_position_y', 'left_position_z', 'left_pupil',
#        'right_gaze_x', 'right_gaze_y', 'right_position_x', 'right_position_y',
#        'right_position_z', 'right_pupil','understand']

features = ['left_gaze_x', 'left_gaze_y', 'right_gaze_x', 'right_gaze_y','understand','interest']

participants = gaze_data.participant.unique()

In [16]:
gaze_data['interest'] = gaze_data['interest'].replace([1,2,3,4], [0,1,2,3])
gaze_data['understand'] = gaze_data['understand'].replace([1,2,3,4], [0,1,2,3])
#gaze_data['bin_interest'] = gaze_data['interest'].replace([0,1,2,3])

In [17]:
def normalize(dataset):
    mu = np.mean(dataset, axis=0)
    sigma = np.std(dataset, axis=0)
    return (dataset - mu)/sigma

def get_frames(df, frame_size, step_size, label_name):

    N_FEATURES = len(features) - 1

    frames = []
    labels = []
    for i in range(0, len(df) - frame_size, step_size):
        for column in df.columns:
            if column!=label_name:
                column = df[column].values[i: i + frame_size]
                frames.append([column])
        else:
            
        # Retrieve the most often used label in this segment
            label = stats.mode(df[label_name][i: i + frame_size])[0][0]
            labels.append(label)

    # Bring the segments into a better shape
    frames = np.asarray(frames).reshape(-1, frame_size, N_FEATURES)
    labels = np.asarray(labels)

    return frames, labels

In [18]:
samp_freq = 60
frame_size = samp_freq*30 
step_size = samp_freq*15 

def prepare_data(data, fold):
  
    df_val = data[data['participant'] == participants[fold]]
    df_train = data[data['participant'] != participants[fold]]

    # df_train.drop(['participant','document'], axis=1, inplace=True)
    # df_val.drop(['participant','document'], axis=1, inplace=True)

    df_train = df_train[features]
    df_val = df_val[features]
    
    for col in df_train.columns:
        
        if col != 'interest':        
            df_train[col] = normalize(df_train[col])
            df_val[col] = normalize(df_val[col])

    x_train, y_train = get_frames(df_train, frame_size, step_size, 'interest')
    num_time_periods, num_sensors = x_train.shape[1], x_train.shape[2]
    num_classes = 4
    print (x_train.shape, y_train.shape)

    input_shape = (num_time_periods * num_sensors)
    x_train = x_train.reshape(x_train.shape[0], input_shape)

    # x_train = x_train.astype("float32")
    # y_train = y_train.astype("float32")

    #y_train = np_utils.to_categorical(y_train, num_classes)
    print(y_train.shape)

    x_val, y_val = get_frames(df_val, frame_size, step_size, 'interest')

    num_time_periods1, num_sensors1 = x_val.shape[1], x_val.shape[2]

    input_shape1 = (num_time_periods1 * num_sensors1)
    x_val = x_val.reshape(x_val.shape[0], input_shape1)

    # x_val = x_val.astype("float32")
    # y_val = y_val.astype("float32")

    #y_val = np_utils.to_categorical(y_val, num_classes)

    return num_time_periods, num_sensors, x_train, x_val, y_train, y_val

In [19]:
# Hyper-parameters
BATCH_SIZE = 256
EPOCHS = 10

In [24]:
conf_matrix_list_of_arrays = []
scores = []
acc_per_fold = []
loss_per_fold = []
f1_per_fold = []
prec_per_fold = []
rec_per_fold = []

def build_model(num_time_periods, num_sensors, num_classes, input_shape):
  # 1D CNN neural network
    
    model = Sequential()
    model.add(Reshape((frame_size, num_sensors), input_shape=(input_shape,)))
    model.add(Conv1D(64, 5, activation='relu', input_shape=(frame_size, num_sensors)))
    model.add(Conv1D(32, 5, activation='relu'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(32, 10, activation='relu'))

    model.add(Conv1D(15, 3, activation='relu'))
    model.add(Dropout(0.2))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))

    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer='adam', metrics=['accuracy'])
    return model


def evaluate(num_time_periods, num_sensors,t_x, val_x, t_y, val_y, EPOCHS, BATCH_SIZE):
    
    input_shape = (num_time_periods * num_sensors)
    model = build_model(num_time_periods, num_sensors, 4, input_shape)
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

    history = model.fit(t_x, t_y, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1, callbacks=[es], validation_split=0.3)
    scores = model.evaluate(val_x, val_y)
    print(f'Score : {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])
    
    
    y_pred = np.argmax(model.predict(val_x), axis=-1)
    f1_per_fold.append(f1_score(val_y, y_pred, average='weighted'))
    prec_per_fold.append(precision_score(val_y, y_pred, average='weighted'))
    rec_per_fold.append(recall_score(val_y, y_pred, average='weighted'))
    print('F1 score : ', f1_score(val_y, y_pred, average='weighted'))


    cm = confusion_matrix(val_y, y_pred, labels=[0,1,2,3])
    print('Confusion Matrix : ')
    print(cm)
    
    #conf_matrix_list_of_arrays.append(cm)
    return history, scores, y_pred, acc_per_fold, f1_per_fold

def plot_cmx(labels, predicteds, binary=False):
    cmx = confusion_matrix(labels, predicteds)
    cmx = cmx.astype('float') / cmx.sum(axis=1)[:, np.newaxis]
    if binary:
        class_names = ['not-interested', 'interested']
    else:
        class_names = ['1','2','3','4']
    plt.figure(figsize=(8, 6))
    plt.title("mean accuracy: %.2f" % accuracy_score(labels, predicteds))
    sns.heatmap(cmx, annot=True, fmt=".0%",center=1, xticklabels=class_names, yticklabels=class_names)

    plt.ylabel('Predicted')
    plt.xlabel('Actual')
    plt.show()


In [25]:
model_history = [] 
cvscores = []
predicted = []
labels = []

for i in range(len(participants)):
    
    pred = []
    label = []
    print("Training with participant "+ participants[i] +" left out")
    num_time_periods, num_sensors, t_x, val_x, t_y, val_y = prepare_data(gaze_data, i)
    results, scores, preds, acc, f1 = evaluate(num_time_periods, num_sensors,t_x, val_x, t_y, val_y, EPOCHS, BATCH_SIZE)
    model_history.append(results)
    cvscores.append(scores[1] * 100)
    predicted.extend(preds)
    labels.extend(val_y) 
    print("Accuracy with participant " + participants[i] + ' as test :'+str(scores[1]*100))
    print("======="*12, end="\n\n\n")

#predicted = np.argmax(predicted, axis=1)
plot_cmx(labels, predicted)
print ('Average accuracy with all the participants')


Training with participant p01 left out
(3076, 1800, 5) (3076,)
(3076,)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score : loss of 1.300048589706421; accuracy of 53.140097856521606%
F1 score :  0.5310367327563499
Confusion Matrix : 
[[40  0  1  0]
 [43 51 52  0]
 [ 0  1 19  0]
 [ 0  0  0  0]]
Accuracy with participant p01 as test :53.140097856521606


Training with participant p02 left out
(2928, 1800, 5) (2928,)
(2928,)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score : loss of 1.1506725549697876; accuracy of 62.078648805618286%
F1 score :  0.589583439478992
Confusion Matrix : 
[[103   0   1   0]
 [ 90  28   9   0]
 [ 29   6  90   0]
 [  0   0   0   0]]
Accuracy with participant p02 as test :62.078648805618286


Training with participant p05 left out
(3106, 1800, 5) (3106,)
(3106,)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch

KeyboardInterrupt: 

In [None]:
print (np.mean(acc), np.mean(f1))