# MobileNetV2 with Synthetic Face Data

Dataset from [Kaggle](https://www.kaggle.com/datasets/allexmendes/synthetic-gaze-and-face-segmentation/data)

This notebook trains a gaze prediction model using a dual-input CNN:
- **Input 1**: 224×224 face image (see the other notebook for preprocessing)
- **Input 2**: 4D pupil coordinate vector (`L_Pupil` and `R_Pupil`)
- **Output**: 2D normalized gaze direction vector (from `ImageEyesGazeDirection` annotation)

We use MobileNetV2 as the backbone for the visual stream and concatenate it with pupil coordinates before regression.

In [2]:
!pip install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Downloading opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (63.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.0/63.0 MB[0m [31m215.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: opencv-python
Successfully installed opencv-python-4.11.0.86


In [None]:
!sudo apt-get install -y cuda-compiler-12-2

In [None]:
import os
import json
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/opt/conda/nvvm/libdevice'

import tensorflow as tf

print("TF version:", tf.__version__)
print("GPUs:", tf.config.list_physical_devices('GPU'))

2025-05-26 06:46:59.016768: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-26 06:46:59.030634: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748242019.048681    6262 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748242019.054243    6262 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-26 06:46:59.072102: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

TF version: 2.18.0
GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
# import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, Concatenate, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

## Load Dataset with Normalized Pupil Coordinates and Gaze Vectors

In [5]:
%pwd

'/home/sagemaker-user/gesture-gaze-extension/notebooks'

In [4]:
PROJ_DIR = '/home/sagemaker-user/gesture-gaze-extension'
DATA_DIR = os.path.join(PROJ_DIR, 'datasets')
IMG_DIR = os.path.join(DATA_DIR, 'SynthGazeProcessed/images')
JSON_DIR = os.path.join(DATA_DIR, 'SynthGazeProcessed/json')
BEST_MODEL_PATH = os.path.join(PROJ_DIR, 'models/mobilenetv2_synth_best.keras')
LAST_MODEL_PATH = os.path.join(PROJ_DIR, 'models/mobilenetv2_synth_last.keras')
TF_SAVE_MODEL_PATH = os.path.join(PROJ_DIR, 'models/tf_synth_save')
TFJS_MODEL_PATH = os.path.join(PROJ_DIR, 'models/tfjs_synth_model')
IMG_SIZE = (224, 224)
SEED = 42028

def load_dataset():
    images = []
    pupils = []
    labels = []

    for fname in sorted(os.listdir(JSON_DIR)):
        if not fname.endswith('.json'):
            continue

        json_path = os.path.join(JSON_DIR, fname)
        img_path = os.path.join(IMG_DIR, fname.replace('.json', '.png'))

        with open(json_path, 'r') as f:
            data = json.load(f)

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32) / 255.0

        l_pupil = data["Landmarks"]["L_Pupil"]
        r_pupil = data["Landmarks"]["R_Pupil"]

        # Normalize pupil coords
        norm_pupils = [
            l_pupil[0] / IMG_SIZE[0], l_pupil[1] / IMG_SIZE[1],
            r_pupil[0] / IMG_SIZE[0], r_pupil[1] / IMG_SIZE[1]
        ]

        gaze = data["Overall"]["ImageEyesGazeDirection"]

        images.append(img)
        pupils.append(norm_pupils)
        labels.append(gaze)

    return np.array(images), np.array(pupils), np.array(labels)


X_img, X_pupil, y = load_dataset()
X_img_train, X_img_val, X_pupil_train, X_pupil_val, y_train, y_val = train_test_split(
    X_img, X_pupil, y, test_size=0.2, random_state=SEED)

In [5]:
print(X_pupil_train.shape)  # should be (N, 4)
print(X_img_train.shape)    # should be (N, 224, 224, 3)

(3200, 4)
(3200, 224, 224, 3)


## Define Cosine Similarity Loss for Gaze Vectors

In [6]:
def cosine_loss(y_true, y_pred):
    y_true = tf.math.l2_normalize(y_true, axis=-1)
    y_pred = tf.math.l2_normalize(y_pred, axis=-1)
    return 1 - tf.reduce_sum(y_true * y_pred, axis=-1)

## Build the Dual-Input Gaze Prediction Model

In [7]:
def build_dual_input_model():
    img_input = Input(shape=(224, 224, 3), name='image_input')
    pupil_input = Input(shape=(4,), name='pupil_input')

    base_model = MobileNetV2(include_top=False, weights='imagenet', input_tensor=img_input)

    x = base_model.output
    x = GlobalAveragePooling2D()(x)

    y = Dense(32, activation='relu')(pupil_input)

    combined = Concatenate()([x, y])
    z = Dense(128, activation='relu')(combined)
    z = Dropout(0.3)(z)
    output = Dense(2, activation='linear', name='gaze_output')(z)

    model = Model(inputs=[img_input, pupil_input], outputs=output)
    return model


model = build_dual_input_model()
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss=cosine_loss, metrics=['mae'])
model.summary()

  base_model = MobileNetV2(include_top=False, weights='imagenet', input_tensor=img_input)
I0000 00:00:1748242041.843106    6262 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13764 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:1e.0, compute capability: 7.5
W0000 00:00:1748242042.294328    6300 gpu_backend_lib.cc:579] Can't find libdevice directory ${CUDA_DIR}/nvvm/libdevice. This may result in compilation or runtime failures, if the program we try to run uses routines from libdevice.
Searched for CUDA in the following directories:
  /opt/conda/nvvm/libdevice
  ipykernel_launcher.runfiles/cuda_nvcc
  ipykern/cuda_nvcc
  
  /usr/local/cuda
  /opt/conda/lib/python3.12/site-packages/tensorflow/python/platform/../../../nvidia/cuda_nvcc
  /opt/conda/lib/python3.12/site-packages/tensorflow/python/platform/../../../../nvidia/cuda_nvcc
  /opt/conda/lib/python3.12/site-packages/tensorflow/python/platform/../../cuda
  .
You can choose the sear

## Train

In [8]:
# LR Scheduler
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,       # shrink LR by half
    patience=15,       # wait 5 epochs with no improvement
    cooldown=5,
    min_lr=1e-6,
    verbose=1
)

# Checkpoints
callbacks = [
    lr_scheduler,
    ModelCheckpoint(
        BEST_MODEL_PATH,
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,
        verbose=1
    ),
    ModelCheckpoint(
        LAST_MODEL_PATH,
        save_best_only=False,
        save_weights_only=False,
        verbose=1
    ),
    EarlyStopping(
        monitor='val_loss',
        patience=20,
        restore_best_weights=True
    )
]

In [9]:
history = model.fit(
    [X_img_train, X_pupil_train], y_train,
    validation_data=([X_img_val, X_pupil_val], y_val),
    epochs=100,
    batch_size=32,
    callbacks=callbacks
)
model.export(TF_SAVE_MODEL_PATH)

Epoch 1/100


I0000 00:00:1748242112.209872    6299 service.cc:148] XLA service 0x7f86400057d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748242112.209914    6299 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2025-05-26 06:48:32.822942: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1748242114.940298    6299 cuda_dnn.cc:529] Loaded cuDNN version 90800
E0000 00:00:1748242118.714407    6299 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1748242118.852490    6299 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
W0000 00:00:1748242130.180037    6299 gpu_backend_li

InternalError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/opt/conda/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/opt/conda/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/opt/conda/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/opt/conda/lib/python3.12/asyncio/base_events.py", line 645, in run_forever

  File "/opt/conda/lib/python3.12/asyncio/base_events.py", line 1999, in _run_once

  File "/opt/conda/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/opt/conda/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3077, in run_cell

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3132, in _run_cell

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3336, in run_cell_async

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3519, in run_ast_nodes

  File "/opt/conda/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3579, in run_code

  File "/tmp/ipykernel_6262/4089823193.py", line 1, in <module>

  File "/opt/conda/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/conda/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/opt/conda/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/opt/conda/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

libdevice not found at ./libdevice.10.bc
	 [[{{node StatefulPartitionedCall}}]] [Op:__inference_multi_step_on_iterator_33358]

In [None]:
# 1. Plot training curves
def plot_training_curves(history):
    plt.figure(figsize=(10, 4))
    # Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Acc')
    plt.plot(history.history['val_accuracy'], label='Val Acc')
    plt.title('Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.title('Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()


plot_training_curves(history)

## Visualize Gaze Prediction on Validation Set

In [None]:
def draw_vector(img, vector, color=(0, 255, 0), scale=50):
    h, w = img.shape[:2]
    start = (w // 2, h // 2)
    end = (int(start[0] + vector[0]*scale), int(start[1] + vector[1]*scale))
    img_arrow = img.copy()
    cv2.arrowedLine(img_arrow, start, end, color, 2, tipLength=0.3)
    return img_arrow

In [None]:
best_model = tf.keras.models.load_model(BEST_MODEL_PATH)
last_model = tf.keras.models.load_model(LAST_MODEL_PATH)

In [None]:
n = 5
for i in range(n):
    img = (X_img_val[i] * 255).astype(np.uint8)
    true_vec = y_val[i]
    pred_vec = model.predict([X_img_val[i:i+1], X_pupil_val[i:i+1]])[0]

    vis_img = draw_vector(img, true_vec, color=(0, 255, 0))
    vis_img = draw_vector(vis_img, pred_vec, color=(255, 0, 0))

    plt.imshow(vis_img)
    plt.title("Green: GT, Red: Pred")
    plt.axis("off")
    plt.show()