In [12]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, BatchNormalization, Input, Dropout
import os
import cv2 as cv
from sklearn.model_selection import train_test_split
import math

In [13]:
def conv_block(filters, strides):
    return Conv2D(filters=filters, kernel_size=3, strides=strides, activation='relu', padding='valid')

In [14]:
def eye_conv_blocks(inputs) :
    conv1 = conv_block(filters=64, strides=1)(inputs)
    conv2 = conv_block(filters=64, strides=2)(conv1)
    conv3 = conv_block(filters=128, strides=1)(conv2)
    conv4 = conv_block(filters=128, strides=2)(conv3)
    conv5 = conv_block(filters=256, strides=1)(conv4)
    conv6 = conv_block(filters=256, strides=2)(conv5)
    return conv6

In [15]:
def far_net() :
    left_eye_input = Input(shape=(36,60,1))
    first_stream = eye_conv_blocks(left_eye_input)
    left_eye_features = Flatten()(first_stream)
    left_eye_features = Dense(500, activation='relu')(left_eye_features)
    
    
    right_eye_input = Input(shape=(36,60,1))
    second_stream = eye_conv_blocks(right_eye_input)
    right_eye_features = Flatten()(second_stream)
    right_eye_features = Dense(500, activation='relu')(right_eye_features)

    face_input = Input(shape=(224,224,3))
    face_features = Conv2D(96, kernel_size=(11,11), strides=(4,4), activation='relu')(face_input)
    face_features = MaxPooling2D(pool_size=(3,3), strides=(2,2))(face_features)
    face_features = BatchNormalization()(face_features)
    
    face_features = Conv2D(256, kernel_size=(5,5), padding='same', activation='relu')(face_features)
    face_features = MaxPooling2D(pool_size=(3,3), strides=(2,2))(face_features)
    face_features = BatchNormalization()(face_features)
    
    face_features = Conv2D(384, kernel_size=(3,3), padding='same', activation='relu')(face_features)
    face_features = Conv2D(384, kernel_size=(3,3), padding='same', activation='relu')(face_features)
    face_features = Conv2D(256, kernel_size=(3,3), padding='same', activation='relu')(face_features)
    face_features = MaxPooling2D(pool_size=(3,3), strides=(2,2))(face_features)
    face_features = BatchNormalization()(face_features)
    
    # Replace local response normalization with batch normalization
    
    # Flatten layer
    face_features = Flatten()(face_features)
    
    # Fully connected layers
    face_features = Dense(4096, activation='relu')(face_features)
    face_features = Dense(4096, activation='relu')(face_features)
    
    # Change the output dimension of the final fully connected layer
    face_features = Dense(500, activation='relu')(face_features)
    
    # ----
    # third_stream =  Conv2D(96, kernel_size=(11, 11), strides=(4, 4), activation='relu')(face_input)
    # third_stream = BatchNormalization()(third_stream)
    # third_stream = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(third_stream)
    
    # third_stream = Conv2D(256, kernel_size=(5, 5), strides=(1, 1), activation='relu')(third_stream)
    # third_stream = BatchNormalization()(third_stream)
    # third_stream = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(third_stream)
    
    # third_stream = Conv2D(384, kernel_size=(3, 3), strides=(1, 1), activation='relu')(third_stream)
    # third_stream = Conv2D(384, kernel_size=(3, 3), strides=(1, 1), activation='relu')(third_stream)
    # third_stream = Conv2D(256, kernel_size=(3, 3), strides=(1, 1), activation='relu')(third_stream)
    # third_stream = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(third_stream)
    # face_features = Flatten()(third_stream)
    # face_features = Dense(4096, activation='relu')(face_features)
    # face_features = Dropout(0.5)(face_features)
    # face_features = Dense(4096, activation='relu')(face_features)
    # face_features = Dropout(0.5)(face_features)
    # face_features = Dense(500, activation='relu')(face_features)
    # ----
    
    concatenated_features = Concatenate()([left_eye_features,right_eye_features, face_features])
    outputs = Dense(6,activation=None)(concatenated_features)
    
    model = keras.Model(inputs=[left_eye_input,right_eye_input,face_input], outputs=outputs)
    
    return model

In [16]:
def e_net():
    left_eye_input = Input(shape=(36,60,1))
    first_stream = eye_conv_blocks(left_eye_input)
    left_eye_features = Flatten()(first_stream)
    left_eye_features = Dense(1000, activation='relu')(left_eye_features)
    left_eye_features = Dense(500, activation='relu')(left_eye_features)
    
    
    right_eye_input = Input(shape=(36,60,1))
    second_stream = eye_conv_blocks(right_eye_input)
    right_eye_features = Flatten()(second_stream)
    right_eye_features = Dense(1000, activation='relu')(right_eye_features)
    right_eye_features = Dense(500, activation='relu')(right_eye_features)
    
    concatenated_features = Concatenate()([left_eye_features,right_eye_features])
    output = Dense(2, activation='softmax')(concatenated_features)
    
    model = keras.Model(inputs=[left_eye_input, right_eye_input], outputs=output)
    
    return model


In [17]:
def batch_data(data, batch_size):
    for i in range(0, len(data), batch_size):
        yield zip(*data[i:i+batch_size])

In [18]:

# Function to load images and annotations from a subject directory
def load_subject_data(subject_dir):
    l_eye_imgs = []
    r_eye_imgs = []
    face_imgs = []
    labels = []
    
    days = [day_dir for day_dir in os.listdir(subject_dir) if day_dir.startswith("day")]
    
    for day in days:
        day_path = os.path.join(subject_dir, day)
        if os.path.isdir(day_path):
            
            annotation_path = os.path.join(day_path, 'data.txt') 
            
            # Load annotations from data.txt
            with open(annotation_path, 'r') as f:
                annotations = [line.strip().split() for line in f.readlines()]
                
            # Load images and annotations
            for ann in annotations:
                image_name = ann[0]  
                label = ann[1:]
                
                # print(label)
                
                l_eye_img = cv.imread(f"{day_path}/left_eye/{image_name}",cv.IMREAD_GRAYSCALE)
                
                r_eye_img = cv.imread(f"{day_path}/right_eye/{image_name}",cv.IMREAD_GRAYSCALE)
                
                face_img = cv.imread(f"{day_path}/face/{image_name}")
                
                l_eye_imgs.append(l_eye_img)
                r_eye_imgs.append(r_eye_img)
                face_imgs.append(face_img)
                labels.append(label)
                
    return np.array(l_eye_imgs),np.array(r_eye_imgs),np.array(face_imgs),np.array(labels) 

# Function to load the entire dataset
def load_dataset(original_dataset=True):
    path = "data_subset/original" if original_dataset else "data_subset/enhanced"
    
    l_eye_list = []
    r_eye_list = []
    face_list = []
    label_list = []
    
    for i in range(14,15):
        subject_path = os.path.join(path, f"p{i:02d}")
        if os.path.isdir(subject_path):
            l_eye_imgs, r_eye_imgs, face_imgs, labels = load_subject_data(subject_path)
            
            l_eye_list.extend(l_eye_imgs)
            r_eye_list.extend(r_eye_imgs)
            face_list.extend(face_imgs)
            label_list.extend(labels)
            
    return np.array(l_eye_list), np.array(r_eye_list), np.array(face_list), np.array(labels, dtype=float)

In [19]:

# Load the dataset
l_eye_images, r_eye_images, face_images, labels = load_dataset(original_dataset=False)


l_eye_train, l_eye_test, r_eye_train, r_eye_test, face_train, face_test, labels_train, labels_test = train_test_split(
    l_eye_images, r_eye_images, face_images, labels, test_size=0.2, random_state=42)

# Check the shapes of the train and test sets
print("Left eye train shape:", l_eye_train.shape)
print("Right eye train shape:", r_eye_train.shape)
print("Face train shape:", face_train.shape)
print("Labels train shape:", labels_train.shape)

print("Left eye test shape:", l_eye_test.shape)
print("Right eye test shape:", r_eye_test.shape)
print("Face test shape:", face_test.shape)
print("Labels test shape:", labels_test.shape)


Left eye train shape: (1200, 36, 60)
Right eye train shape: (1200, 36, 60)
Face train shape: (1200, 224, 224, 3)
Labels train shape: (1200, 6)
Left eye test shape: (300, 36, 60)
Right eye test shape: (300, 36, 60)
Face test shape: (300, 224, 224, 3)
Labels test shape: (300, 6)


In [20]:
def split_into_batches(data, batch_size):
    batches = []
    for i in range(0, len(data), batch_size):
        batch = data[i:i+batch_size]
        batches.append(zip(*batch))
    return batches

In [21]:
def angular_err(v1, v2):
	v1xv2 = tf.reduce_sum(v1*v2,1)
	v1_len = tf.sqrt(tf.reduce_sum(tf.square(v1), 1))
	v2_len = tf.cast(tf.sqrt(tf.reduce_sum(tf.square(v2), 1)),dtype=tf.float32)
 
	# print("===")
	# print(v1xv2)
	# print("===")
	# print(v1_len)
	# print("===")
	# print(v2_len)
	
	val = tf.minimum( v1xv2/((v1_len* v2_len ) + 1e-10), 0.999999)
 
	degree = tf.acos(val)
	return degree

In [22]:


def train(ds_train,batch_size=100,epochs=1) :
    
    far_net_model = far_net()
    e_net_model = e_net()
    
    far_optimizer = tf.keras.optimizers.Adam(learning_rate=0.005)
    e_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
    
    angular_errors = []
    
    for epoch in range(epochs) :
        print(f"Epoch {epoch} -- START")
        
        for l_eyes, r_eyes, faces, labels in split_into_batches(ds_train,batch_size) :
            
            l_eyes, r_eyes, faces, labels = np.array(l_eyes), np.array(r_eyes), np.array(faces), np.array(labels)
            
            with tf.GradientTape() as far_tape, tf.GradientTape() as e_tape :
                
                gaze_preds = far_net_model([l_eyes, r_eyes, faces])
                probs = e_net_model([l_eyes, r_eyes])
                
                print(f"Gaze  --> {gaze_preds[:,:3]}")
                print(f"Label --> {labels[:,:3]}")
                
                print(f"Gaze  --> {gaze_preds[:,3:]}")
                print(f"Label --> {labels[:,3:]}")
                
                left_err = angular_err(gaze_preds[:,:3],labels[:,:3])
                right_err = angular_err(gaze_preds[:,3:],labels[:,3:])
                
                print(f"Left Err --> {left_err}")
                print(f"Right Err --> {right_err}")
                
                far_err = ((2 * left_err * right_err ) + 1e-10)  / ((left_err + right_err) + 1e-10)
                # print(f"FAR-err -> {far_err}")
                
                avg_err = (left_err + right_err) /2
                
                n = tf.cast(tf.less_equal(left_err, right_err), tf.float32)
                
                # print(f"N -> {n}")
                
                squared_distance = tf.reduce_sum(tf.square(left_err - right_err), axis=-1)
                e_loss = - (n * squared_distance * tf.math.log(probs[:,0]) + (1 - n) * squared_distance * tf.math.log(probs[:,1]))

                
                weight = (1 + (2 * n - 1) * probs[:,0] + (1 - 2 * n) * probs[:,1]) / 2
                
                # print(f"W --> {weight}")
                
                far_loss = weight * far_err + (1 - weight) * 0.1 * avg_err
                print(f"FAR loss --> {tf.reduce_mean(far_loss)}")
                print(f"E-loss -> {tf.reduce_mean(e_loss)}")
                
            gradients1 = far_tape.gradient(far_loss, far_net_model.trainable_variables)
            gradients2 = e_tape.gradient(e_loss, e_net_model.trainable_variables)
            
            print(f"GRAD --> {gradients1}")
            print(f"GRAD --> {gradients2}")
            
            far_optimizer.apply_gradients(zip(gradients1, far_net_model.trainable_variables))
            e_optimizer.apply_gradients(zip(gradients2, e_net_model.trainable_variables))
            
        print(f"Epoch {epoch} -- END")       

train(ds_train=list(zip(l_eye_train, r_eye_train, face_train, labels_train)))

Epoch 0 -- START
Gaze  --> [[-1.0143578  -3.651013    4.634084  ]
 [-1.4060545  -5.341728    3.0473123 ]
 [-1.4311694  -4.421141    4.2782106 ]
 [-2.1366584  -4.0837197   1.7377405 ]
 [-1.3256698  -4.557988    4.1628356 ]
 [-0.87268937 -4.5266414   4.2508016 ]
 [-1.3610823  -4.213584    4.0971093 ]
 [-0.6633558  -5.5245323   5.65858   ]
 [-0.8033259  -4.749507    1.9913126 ]
 [-1.2251904  -3.1424983   2.598091  ]
 [-0.9766922  -4.3088593   3.8201334 ]
 [-1.9414487  -5.8662386   2.2533581 ]
 [-1.533657   -4.475422    4.923814  ]
 [-1.3612862  -5.659019    3.0015445 ]
 [-1.335918   -5.0462093   3.5369623 ]
 [-1.1378129  -5.412487    2.2915006 ]
 [-1.2327687  -3.6165285   2.5072465 ]
 [-1.4185431  -4.350061    4.4967275 ]
 [-0.47030592 -4.4129734   3.1427414 ]
 [-1.7048144  -4.7861176   2.5276318 ]
 [-1.4899228  -4.0642996   3.9243476 ]
 [-0.932412   -2.7498722   4.0394344 ]
 [-1.6785531  -3.8855193   3.0712233 ]
 [-1.7692615  -3.7356496   4.517286  ]
 [-0.5015838  -2.990339    3.5520375 