In [4]:
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import json
import dlib
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

In [5]:
def preprocess_eye_region(frame, eye_coords, target_size=(30, 36)):
    """
    Preprocesses the eye region for the CNN model.
    Args:
        frame: The input image frame (in BGR format).
        eye_coords: Coordinates of the eye region.
        target_size: The target size for each eye region.
    Returns:
        The preprocessed eye region.
    """
    x_min = min(x for x, y in eye_coords)
    x_max = max(x for x, y in eye_coords)
    y_min = min(y for x, y in eye_coords)
    y_max = max(y for x, y in eye_coords)

    # Cropping the eye region based on the extremities of the landmarks
    cropped_eye = frame[y_min:y_max, x_min:x_max]

    # Resizing the cropped eye region to the target size
    resized_eye = cv2.resize(cropped_eye, target_size)

    return resized_eye.astype(np.float32) / 255.0

In [6]:
def get_combined_eyes(frame):
    """
    Detects and combines the eye regions from the frame.
    Args:
        frame: The input image frame.
    Returns:
        The combined eye regions, or None if not detected.
        
        
    """
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)

    for face in faces:
        landmarks = predictor(gray, face)

        # Extract the coordinates for each eye
        left_eye = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(36, 42)]
        right_eye = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(42, 48)]

        # Preprocess each eye region
        left_eye_region = preprocess_eye_region(frame, left_eye)

        right_eye_region = preprocess_eye_region(frame, right_eye)

        # Combine the eyes side by side
        combined_eyes = np.hstack([left_eye_region, right_eye_region])

        # Ensure the combined eyes image has the correct shape
        if combined_eyes.shape[1] != 60:
            raise ValueError("Combined eyes region does not match the expected width.")
        return combined_eyes

    return None

In [7]:

def normalize_head_pose(head_pose_data, rotation_scale=180, translation_max_displacement=None):
    """
    Normalizes the head pose data.
    Args:
        head_pose_data: List containing the head pose data (rotation and translation vectors).
        rotation_scale: Maximum value for the rotation vector components (180 for degrees, np.pi for radians).
        translation_max_displacement: A tuple (max_x, max_y, max_z) representing the maximum displacement in each axis. If None, standard deviation normalization will be used.

    Returns:
        Normalized head pose data.
    """
    # Normalize rotation vectors
    normalized_rotation = np.array(head_pose_data[:3]) / rotation_scale

    # Normalize translation vectors
    if translation_max_displacement:
        max_x, max_y, max_z = translation_max_displacement
        normalized_translation = np.array(head_pose_data[3:]) / np.array([max_x, max_y, max_z])
    else:
        # Standard deviation normalization
        translation_vector = np.array(head_pose_data[3:])
        std_dev = np.std(translation_vector)
        mean_val = np.mean(translation_vector)
        normalized_translation = (translation_vector - mean_val) / std_dev

    return np.concatenate([normalized_rotation, normalized_translation]).tolist()


In [None]:
import os
from glob import glob
import pandas as pd
# Assuming normalize_head_pose and get_combined_eyes are defined as before
def get_screen_size(metadata_file_path):
    with open(metadata_file_path, 'r') as f:
        metadata = json.load(f)

        # Check if 'screenData' is a key in the metadata
        if 'screenData' in metadata:
            metadata = metadata['screenData']
        # Otherwise, assume the metadata is already at the top level

        screen_width = metadata.get('screenWidth')
        screen_height = metadata.get('screenHeight')

        if screen_width is None or screen_height is None:
            raise ValueError("Screen size not found in metadata")

        return screen_width, screen_height


def parse_head_pose_data(row):
    # Split the strings and convert to float
    rotation_str, translation_str = row['head_pose'], row['head_translation']
    rotation = [float(x) for x in rotation_str.strip('"').split(',')]
    translation = [float(x) for x in translation_str.strip('"').split(',')]
    return rotation + translation  # Combine into a single list

def prepare_dataset(base_dir):
    X, Y = [], []
    
    column_names = ['image_path', 'cursor_x', 'cursor_y', 'left_pup', 'eye_y1', 'eye_x2', 'eye_y2', 'eye_x3', 'eye_y3', 'eye_x4', 'eye_y4', 'eye_x5', 'eye_y5', 'eye_x6', 'eye_y6', 'head_pose', 'head_translation']

    for subdir in glob(os.path.join(base_dir, '*/')):
        if 'calibration' in os.path.basename(os.path.normpath(subdir)).lower():
            continue
        print(f"Processing directory: {subdir}")
        metadata_file_path = os.path.join(subdir, 'metadata.json')
        screen_width, screen_height = get_screen_size(metadata_file_path)
        print(f"Screen size: {screen_width}x{screen_height}")

        # Find any CSV file in the directory
        csv_files = glob(os.path.join(subdir, '*.csv'))
        
        csv_files = [f for f in csv_files if 'calibration' not in os.path.basename(f).lower()]

        if not csv_files:
            print(f"No data CSV file found in directory: {subdir}")
            continue
        data_file_path = csv_files[0]

        # Find any directory that contains image files (assuming JPEG for example)
        img_folders = [d for d in os.listdir(subdir) if os.path.isdir(os.path.join(subdir, d)) and glob(os.path.join(subdir, d, '*.png'))]
        if not img_folders:
            print(f"No image folder found that contains images in directory: {subdir}")
            continue
        data = pd.read_csv(data_file_path, header=None, names=column_names)

        for index, row in data.iterrows():
            # Directly use the image path from the dataframe
            img_path = os.path.join(row['image_path'])
            cursor_x, cursor_y = row['cursor_x'], row['cursor_y']
            eye_box_pupil_data = row[3:15].tolist()
            head_pose_data = parse_head_pose_data(row)

            normalized_eye_box_pupil_data = [float(coord) / screen_width if i % 2 == 0 else float(coord) / screen_height for i, coord in enumerate(eye_box_pupil_data)]
            normalized_head_pose_data = normalize_head_pose(head_pose_data)

            # Load the image
            img = cv2.imread(img_path)
            if img is None:
                print(f"Image not found: {img_path}")
                continue

            combined_eyes = get_combined_eyes(img)

            # Append to datasets
            Y.append([cursor_x / screen_width, cursor_y / screen_height] + normalized_eye_box_pupil_data + normalized_head_pose_data)
            X.append(combined_eyes)
    return X, Y



In [None]:
# Example usage:
base_dir = './data'
X, Y = prepare_dataset(base_dir)

In [13]:

X_filtered = [img for img in X if img is not None and isinstance(img, np.ndarray)]
Y_filtered = [Y[i] for i in range(len(Y)) if X[i] is not None and isinstance(X[i], np.ndarray)]

X_filtered = np.array(X_filtered)

Y_filtered = np.array(Y_filtered)

In [18]:
len(X_filtered), len(Y_filtered)

(408, 408)

In [17]:
Y_filtered = Y_filtered[:, :14]
Y_filtered.shape


(408, 14)

In [19]:

X_train, X_test, Y_train, Y_test = train_test_split(X_filtered, Y_filtered, test_size=0.2, random_state=42)

In [None]:


from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense, MaxPool2D
from keras.metrics import MeanSquaredError, MeanAbsoluteError

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(36, 60, 3)), 
    MaxPool2D(),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPool2D(),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPool2D(),

    Flatten(),
    Dense(64, activation='relu'),
    Dense(14) 
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=[MeanSquaredError(), MeanAbsoluteError()])

In [20]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout

# Define the model
model = Sequential()

# Add convolutional layers with dropout
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(36, 60, 3)))
model.add(MaxPool2D())
model.add(Dropout(0.25))  # Dropout layer after pooling

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D())
model.add(Dropout(0.25))  # Another dropout layer

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPool2D())
model.add(Dropout(0.4))  # Higher dropout rate for deeper layers

# Flatten the output from convolutional layers before passing it to the dense layers
model.add(Flatten())

# Add dense layers with dropout
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))  # Dropout layer before the output layer
model.add(Dense(14, activation='sigmoid'))  # Adjust the number of outputs as needed

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mean_squared_error', 'mean_absolute_error'])

2024-01-20 10:14:32.986886: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-20 10:14:35.123923: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-20 10:14:35.124083: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-20 10:14:35.427002: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-20 10:14:36.127725: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-20 10:14:36.128201: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [21]:
model.fit(X_train, Y_train, epochs=100, validation_split=0.1, batch_size=32)

Epoch 1/100

2024-01-20 10:14:41.729937: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 22861440 exceeds 10% of free system memory.
2024-01-20 10:14:41.803430: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 22861440 exceeds 10% of free system memory.
2024-01-20 10:14:41.896552: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 22861440 exceeds 10% of free system memory.




2024-01-20 10:14:41.954261: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 22861440 exceeds 10% of free system memory.
2024-01-20 10:14:42.017595: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 22861440 exceeds 10% of free system memory.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

<keras.src.callbacks.History at 0x7f56642c00a0>

In [22]:
#evaluate the model
model.evaluate(X_test, Y_test)



[0.010376979596912861, 0.010376979596912861, 0.048071350902318954]

In [23]:
#show the predictions
predictions = model.predict(X_test)
model.save('./models/eye_gaze_v9_1810_drop_out.2.h5')



  saving_api.save_model(
