In [None]:
import tensorflow as tf
gpus=tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu,True)
print(gpus)
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import os,glob, random
tf.__version__

<font color='red' size=4><b>Note: Because of lack of resources only frst two person's data is processed.<br>
p00,p01 and p03 data will be used for calibration testing

<font color='green' size=6><b> Training Pipeline Design

In [None]:
face,lefteye,righteye,rotation_matrix,gaze,subject_id,eye_coords=[],[],[],[],[],[],[]
subject_map={}
data_path='processed_data/Image'
persons=os.listdir(data_path)
persons.sort()
print(persons)
id=0
for person in persons[:2]:
    face+=glob.glob(f'{data_path}/{person}/face/*')
    lefteye+=glob.glob(f'{data_path}/{person}/lefteye/*')
    righteye+=glob.glob(f'{data_path}/{person}/righteye/*')
    rotation_matrix+=glob.glob(f'{data_path}/{person}/rotation_matrix/*')
    gaze+=glob.glob(f'{data_path}/{person}/3d_gaze/*')
    subject_id+=[f'{data_path}/{person}' for _ in range(len(face))]
    eye_coords+=glob.glob(f'{data_path}/{person}/eye_coords/*')
    subject_map[f'{data_path}/{person}']=id
    id+=1
face.sort()
lefteye.sort()
righteye.sort()
rotation_matrix.sort()
gaze.sort()
eye_coords.sort()
subject_id.sort()
print(len(lefteye))
data=list(zip(face,lefteye,righteye,rotation_matrix,eye_coords,subject_id,gaze))   
random.seed(12)
random.shuffle(data)
data=tf.data.experimental.from_list(data)
print(subject_map)

###  Tensorflow StaticHashtable for ids to passed in to embedding which can be used in tf.data pipeline

In [3]:
subject_map=tf.lookup.StaticHashTable( tf.lookup.KeyValueTensorInitializer(list(subject_map.keys()), 
                                        list(subject_map.values())),default_value=-1)

In [4]:
@tf.function
def load_img(img):
    img=tf.io.read_file(img)
    img=tf.io.decode_jpeg(img,3)
    return img
@tf.numpy_function(Tout=tf.float32)
def ld(x):
    return np.load(x).astype('float32').ravel()
@tf.function
def map_fn(face,lefteye,righteye,rotation_matrix,eye_coords,subject_id,gaze):
    face=load_img(face)
    flipped_face=tf.image.flip_left_right(face)
    lefteye=load_img(lefteye)
    righteye=load_img(righteye)
    rotation_matrix=ld(rotation_matrix)
    eye_coords=ld(eye_coords)
    id=subject_map[subject_id]
    gaze=ld(gaze)
    return {
            'face':face,
            'flipped_face':flipped_face,
            'lefteye':lefteye,
            'righteye':righteye,
            'rotation_matrix':rotation_matrix,
            'eye_coords':eye_coords,
            'id':id},gaze


In [None]:
next(iter(data.map(map_fn).batch(5)))

### Pretrained CNN models for the CNNs to extract feature from face and eye images EfficientNetV2 and VGG16

In [6]:
tf.keras.backend.clear_session()
effcnt_net=tf.keras.applications.EfficientNetV2B0(include_top=False,
                                            include_preprocessing=True,
                                            pooling=None)

effcnt_net.trainable=True
vgg16=tf.keras.applications.VGG16(include_top=False,pooling=None)
vgg16.trainable=True
vgg16_processor=tf.keras.applications.vgg16.preprocess_input

### g_face and g_eye

In [7]:
g_face=tf.keras.Model(inputs=effcnt_net.inputs,outputs=effcnt_net.outputs,name='g_face')
g_eye=tf.keras.Model(inputs=vgg16.inputs,outputs=vgg16.outputs,name='g_eye')

### GazeModel implementation

In [8]:
class GazeModel(tf.keras.Model):
    def __init__(self):
        super(GazeModel,self).__init__()
        self.g_face=g_face
        self.g_eye=g_eye
        self.flat=tf.keras.layers.Flatten()
        # Embedding layer as described in the paper
        self.embedding=tf.keras.layers.Embedding(3,6,
                                                 embeddings_regularizer=tf.keras.regularizers.L2(l2=0.01),
                                                 mask_zero=True,name='subject_embedding')
        #gradients wont pass through embedding layer upto 40 epochs
        self.embedding.trainable=False
        #MLP where concatenated features will be passed
        self.MLP=tf.keras.Sequential([
            tf.keras.layers.Dense(1280,activation='relu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(3,name='gaze_location'),
            ],name='MLP')

    def call(self,input_dict):
        #face features from g_face
        face_features=self.g_face(input_dict['face'])
        #flipped face features from g_face 
        flipped_face_features=self.g_face(input_dict['flipped_face'])
        #left eye features from g_eye
        left_features=vgg16_processor(input_dict['lefteye'])
        left_features=self.g_eye(left_features)
        #right eye features from g_eye
        right_features=vgg16_processor(input_dict['righteye'])
        right_features=self.g_eye(right_features)
        #flattening of feature matrices
        face_features=self.flat(face_features)
        flipped_face_features=self.flat(flipped_face_features)
        left_features=self.flat(left_features)
        right_features=self.flat(right_features)
        # subject embedding or person specific embeddings
        embedding=self.embedding(input_dict['id'])
        # rotation  matrix
        rot_mat=input_dict['rotation_matrix']
        # 3d eye coordinates
        eye_coords=input_dict['eye_coords']
        #concantenation of all the features
        total=tf.concat([face_features,flipped_face_features,left_features,
                            right_features,eye_coords,embedding,rot_mat],1)
        #concantenated features passed to MLP
        total=self.MLP(total)
        # return face_features, left_features
        return total


### loss function for GazeModel as described in paper

In [9]:
loss_fn=tf.keras.losses.Huber(delta=1.5)

### Optimizer configuration upto 40 epoch

In [10]:
optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003,
                                   beta_1=0.9,
                                   beta_2=0.999,
                                   epsilon=1e-7)

### Optimizer configuration post 40 epoch

In [11]:
optimizer_post_40_epoch=tf.keras.optimizers.Adam(
    learning_rate=tf.keras.optimizers.schedules.CosineDecay(
                                        0.0001,
                                        10000,
                                        alpha=0.0,
                                        name='CosineDecay',
                                        warmup_target=None,
                                        warmup_steps=0
                                    ),
                                   beta_1=0.9,
                                   beta_2=0.999,
                                   epsilon=1e-7)


### AngularError metric implementation

In [13]:
class AngularError(tf.keras.metrics.Metric):

    def __init__(self, name='mean_angular_error', **kwargs):
        super().__init__(name=name, **kwargs)
        self.total_error = self.add_weight(name='total_error', initializer='zeros')
        self.num_samples = self.add_weight(name='num_samples', initializer='zeros')
        
    def update_state(self, y_true, y_pred,sample_weight=None):
        y_true = tf.math.l2_normalize(y_true, axis=-1)
        y_pred = tf.math.l2_normalize(y_pred, axis=-1)
    
        dot_product = tf.reduce_sum(y_true * y_pred, axis=-1)
        dot_product = tf.clip_by_value(dot_product, -1.0, 1.0)
     
        angular_error = tf.acos(dot_product)
        angular_error=angular_error*57.296
        self.total_error.assign_add(tf.reduce_sum(angular_error))
        self.num_samples.assign_add(tf.cast(tf.shape(y_true)[0], tf.float32))

    def result(self):
        return self.total_error / self.num_samples
    def reset_state(self):
        self.total_error.assign(0.0)
        self.num_samples.assign(0.0)


In [None]:
angular_error=AngularError()
# post_40_epoch_cb=CallbackAfterEpoch40(angular_error,loss_fn,optimizer_post_40_epoch) 
model=GazeModel()
model.compile(loss=loss_fn,optimizer=optimizer,metrics=[angular_error])
print(model(next(iter(data.map(map_fn).batch(1).map(lambda x,y:x)))))
model.summary()

### Train Test split

In [15]:
train_data=data.take(data.cardinality().numpy()*0.8)
test_data=data.skip(data.cardinality().numpy()*0.8)

### Training

In [None]:
best_acc=99999
patience=20
for epoch in range(200):
    print('training....')
    model.fit(train_data.map(map_fn,num_parallel_calls=tf.data.AUTOTUNE)
          .batch(16,num_parallel_calls=tf.data.AUTOTUNE).prefetch(2),
          epochs=1)
    print('validation....')
    logs=model.evaluate(test_data.map(map_fn).batch(8))
    if epoch>40:
        patience-=1
    if logs[1]<best_acc:
        patience=20
        best_acc=logs[1]
        model.save_weights('best_GazeModel.h5')
    if epoch==40:
        # embedding layer set to trainable post 40 epoch
        embedding_layer=model.get_layer('subject_embedding')
        embedding_layer.trainable=True
        model.compile(loss=loss_fn,optimizer=optimizer_post_40_epoch,metrics=[angular_error])
        model.summary()
    if not patience:break

In [None]:
model.load_weights('best_GazeModel.h5')
model.evaluate(test_data.map(map_fn).batch(8))