In [12]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, BatchNormalization, Input, Activation, Dropout # type: ignore
import os
from sklearn.model_selection import train_test_split
import math
import read_data

In [13]:
def conv_block(filters, strides):
    return Conv2D(filters=filters, kernel_size=3, strides=strides, activation='relu', padding='valid')

In [14]:
def eye_conv_blocks(inputs) :
    conv1 = conv_block(filters=64, strides=1)(inputs)
    conv2 = conv_block(filters=64, strides=2)(conv1)
    conv3 = conv_block(filters=128, strides=1)(conv2)
    conv4 = conv_block(filters=128, strides=2)(conv3)
    conv5 = conv_block(filters=256, strides=1)(conv4)
    conv6 = conv_block(filters=256, strides=2)(conv5)
    return conv6

In [15]:
def far_net() :
    
    # First Stream
    left_eye_input = Input(shape=(36,60,1))
    first_stream = eye_conv_blocks(left_eye_input)
    left_eye_features = Flatten()(first_stream)
    left_eye_features = Dense(500, activation='relu')(left_eye_features)
    
    # Second Stream
    right_eye_input = Input(shape=(36,60,1))
    second_stream = eye_conv_blocks(right_eye_input)
    right_eye_features = Flatten()(second_stream)
    right_eye_features = Dense(500, activation='relu')(right_eye_features)

    # Third Stream
    face_input = Input(shape=(224,224,3))
    face_features = Conv2D(96, kernel_size=(11,11), strides=(4,4))(face_input)
    face_features = BatchNormalization()(face_features)
    face_features = Activation("relu")(face_features)
    face_features = MaxPooling2D(pool_size=(3,3), strides=(2,2))(face_features)
    
    face_features = Conv2D(256, kernel_size=(5,5), padding='same')(face_features)
    face_features = BatchNormalization()(face_features)
    face_features = Activation("relu")(face_features)
    face_features = MaxPooling2D(pool_size=(3,3), strides=(2,2))(face_features)
    
    face_features = Conv2D(384, kernel_size=(3,3), padding='same')(face_features)
    face_features = BatchNormalization()(face_features)
    face_features = Activation("relu")(face_features)
    face_features = Conv2D(384, kernel_size=(3,3), padding='same')(face_features)
    face_features = BatchNormalization()(face_features)
    face_features = Activation("relu")(face_features)
    face_features = Conv2D(256, kernel_size=(3,3), padding='same')(face_features)
    face_features = BatchNormalization()(face_features)
    face_features = Activation("relu")(face_features)
    face_features = MaxPooling2D(pool_size=(3,3), strides=(2,2))(face_features)
    
    face_features = Flatten()(face_features)
    face_features = Dense(4096, activation='relu')(face_features)
    face_features = Dropout(0.5)(face_features)
    face_features = Dense(4096, activation='relu')(face_features)
    face_features = Dropout(0.5)(face_features)
    face_features = Dense(500, activation='relu')(face_features)
    
    concatenated_features = Concatenate()([left_eye_features,right_eye_features, face_features])
    outputs = Dense(6)(concatenated_features)
    
    model = keras.Model(inputs=[left_eye_input,right_eye_input,face_input], outputs=outputs)
    
    return model

In [16]:
def e_net():
    
    #First Stream
    left_eye_input = Input(shape=(36,60,1))
    first_stream = eye_conv_blocks(left_eye_input)
    left_eye_features = Flatten()(first_stream)
    left_eye_features = Dense(1000, activation='relu')(left_eye_features)
    left_eye_features = Dense(500, activation='relu')(left_eye_features)
    
    #Second Stream
    right_eye_input = Input(shape=(36,60,1))
    second_stream = eye_conv_blocks(right_eye_input)
    right_eye_features = Flatten()(second_stream)
    right_eye_features = Dense(1000, activation='relu')(right_eye_features)
    right_eye_features = Dense(500, activation='relu')(right_eye_features)
    
    concatenated_features = Concatenate()([left_eye_features,right_eye_features])
    output = Dense(2, activation='softmax')(concatenated_features)
    
    model = keras.Model(inputs=[left_eye_input, right_eye_input], outputs=output)
    
    return model


In [17]:
def split_into_batches(data, batch_size):
    batches = []
    for i in range(0, len(data), batch_size):
        batch = data[i:i+batch_size]
        batches.append(zip(*batch))
    return batches

In [18]:
def angular_err(v1, v2):
	v1xv2 = tf.reduce_sum(v1*v2,1)
	v1_len = tf.cast(tf.sqrt(tf.reduce_sum(tf.square(v1), 1)),dtype=tf.float32)
	v2_len = tf.cast(tf.sqrt(tf.reduce_sum(tf.square(v2), 1)),dtype=tf.float32)
	
	val = tf.minimum( v1xv2/((v1_len* v2_len ) + 1e-10), 0.999999)
 
	degree = tf.acos(val)
	return degree * 180/ math.pi

In [19]:
def train(ds_train, ds_test, batch_size=100, epochs=1, original=True) :
    
    ds_train = list(zip(ds_train[0],ds_train[1],ds_train[2],ds_train[3],))
    
    far_net_model = far_net()
    e_net_model = e_net()
    
    far_optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
    e_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
    
    angular_errors = []
    
    far_losses = []
    e_losses = []
    
    for epoch in range(epochs) :
        print(f"Epoch {epoch} -- START")
        
        for l_eyes, r_eyes, faces, labels in split_into_batches(ds_train,batch_size) :
            
            l_eyes, r_eyes, faces, labels = np.array(l_eyes), np.array(r_eyes), np.array(faces), np.array(labels)
            
            with tf.GradientTape() as far_tape, tf.GradientTape() as e_tape :
                
                gaze_preds = far_net_model([l_eyes, r_eyes, faces])
                probs = e_net_model([l_eyes, r_eyes])
                
                # print(f"Gaze  --> {gaze_preds[:,:3]}")
                # print(f"Label --> {labels[:,:3]}")
                
                # print(f"Gaze  --> {gaze_preds[:,3:]}")
                # print(f"Label --> {labels[:,3:]}")
                
                left_err = angular_err(gaze_preds[:,:3],labels[:,:3])
                right_err = angular_err(gaze_preds[:,3:],labels[:,3:])
                
                # print(f"Left Err --> {left_err}")
                # print(f"Right Err --> {right_err}")
                
                far_err = ((2 * left_err * right_err ) + 1e-10)  / ((left_err + right_err) + 1e-10)
                # print(f"FAR-err -> {far_err}")
                
                avg_err = (left_err + right_err) /2
                
                n = tf.cast(tf.less_equal(left_err, right_err), tf.float32)
                
                # print(f"N -> {n}")
                
                squared_distance = tf.reduce_sum(tf.square(left_err - right_err), axis=-1)
                e_loss = - (n * squared_distance * tf.math.log(probs[:,0]) + (1 - n) * squared_distance * tf.math.log(probs[:,1]))

                
                weight = (1 + (2 * n - 1) * probs[:,0] + (1 - 2 * n) * probs[:,1]) / 2
                
                # print(f"W --> {weight}")
                
                far_loss = weight * far_err + (1 - weight) * 0.1 * avg_err
                print(f"FAR loss --> {tf.reduce_mean(far_loss)}")
                print(f"E loss --> {tf.reduce_mean(e_loss)}")
                
            gradients1 = far_tape.gradient(far_loss, far_net_model.trainable_variables)
            gradients2 = e_tape.gradient(e_loss, e_net_model.trainable_variables)
            
            # print(f"GRAD --> {gradients1}")
            # print(f"GRAD --> {gradients2}")
            
            far_optimizer.apply_gradients(zip(gradients1, far_net_model.trainable_variables))
            e_optimizer.apply_gradients(zip(gradients2, e_net_model.trainable_variables))
            
            far_losses.append(far_loss)
            e_losses.append(e_loss)
            
        print(f"Epoch {epoch} -- END")   
        
    save_path = "models/original/" if original else "models/enhanced/"
    
    if not os.path.exists(save_path) :
        os.makedirs(save_path, exist_ok=True) 
        
    far_net_model.save(f"{save_path}far_net_model.keras")
    e_net_model.save(f"{save_path}e_net_model.keras")
    print("Model Saved")
    
    print("Validating Model ...")
    
    ds_test = list(zip(ds_test[0],ds_test[1],ds_test[2],ds_test[3],))
    
    angular_errors = []
    choose_acc = 0
    total = 0
    
    for l_eyes, r_eyes, faces, labels in split_into_batches(ds_test,batch_size) :
    
        l_eyes, r_eyes, faces, labels = np.array(l_eyes), np.array(r_eyes), np.array(faces), np.array(labels)
    
        gaze_preds = far_net_model([l_eyes, r_eyes, faces])
        reliability_preds = e_net_model([l_eyes, r_eyes])
        
        left_err = angular_err(gaze_preds[:,:3],labels[:,:3])
        right_err = angular_err(gaze_preds[:,3:],labels[:,3:])
        
        choose_preds = tf.cast(tf.less_equal(reliability_preds[:,0], reliability_preds[:,1]), tf.int32)
        choose_labels = tf.cast(tf.greater(left_err, right_err), tf.int32)
        
        # print(choose_preds)
        # print(choose_labels)
        
        for i in choose_preds :
            if i == 0 :
                angular_errors.append(left_err[i])
            else :
                angular_errors.append(right_err[i])
        
        
        for choose_pred, choose_label in zip(choose_preds,choose_labels):
            total += 1
            
            if choose_pred == choose_label :
                choose_acc += 1
            
    avg_error = tf.reduce_mean(angular_errors)      
    choose_acc = choose_acc / total * 100
        
    
    print(f"Choose Acc => {choose_acc}%")
    print(f"Angular Error => {avg_error}")
    
    return far_losses, e_losses

In [20]:
is_original = False

In [21]:
l_eye_images, r_eye_images, face_images, labels = read_data.load_dataset(original_dataset=is_original)

l_eye_train, l_eye_test, r_eye_train, r_eye_test, face_train, face_test, labels_train, labels_test = train_test_split(
    l_eye_images, r_eye_images, face_images, labels, test_size=0.2, random_state=42)

# Check the shapes of the train and test sets
print("Left eye train shape:", l_eye_train.shape)
print("Right eye train shape:", r_eye_train.shape)
print("Face train shape:", face_train.shape)
print("Labels train shape:", labels_train.shape)

print("Left eye test shape:", l_eye_test.shape)
print("Right eye test shape:", r_eye_test.shape)
print("Face test shape:", face_test.shape)
print("Labels test shape:", labels_test.shape)

Left eye train shape: (1200, 36, 60)
Right eye train shape: (1200, 36, 60)
Face train shape: (1200, 224, 224, 3)
Labels train shape: (1200, 6)
Left eye test shape: (300, 36, 60)
Right eye test shape: (300, 36, 60)
Face test shape: (300, 224, 224, 3)
Labels test shape: (300, 6)


In [22]:
far_loss, e_loss =  train(ds_train=[l_eye_train, r_eye_train, face_train, labels_train], ds_test=[l_eye_test, r_eye_test, face_test, labels_test], epochs=1, original=is_original)

Epoch 0 -- START
FAR loss --> 25.888978958129883
E loss --> 262665.71875
FAR loss --> 22.811786651611328
E loss --> 14134.0087890625
FAR loss --> 13.943877220153809
E loss --> 95599.8203125
FAR loss --> 9.841591835021973
E loss --> 108592.90625
FAR loss --> 9.609467506408691
E loss --> 72072.828125
FAR loss --> 9.345309257507324
E loss --> 60649.01171875
FAR loss --> 8.168403625488281
E loss --> 40528.83984375
FAR loss --> 8.380247116088867
E loss --> 28693.984375
FAR loss --> 8.174182891845703
E loss --> 18997.431640625
FAR loss --> 7.708372592926025
E loss --> 11444.2099609375
FAR loss --> 6.788550853729248
E loss --> 8837.9267578125
FAR loss --> 5.556084156036377
E loss --> 7105.06884765625
Epoch 0 -- END
Model Saved
Validating Model ...
Choose Acc => 46.33333333333333%
Angular Error => 11.96943187713623
