In [11]:
import numpy as np
import cv2
import dlib
import os
import pandas as pd
import json
from glob import glob
import dlib

In [12]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

In [13]:
def preprocess_eye_region(frame, eye_coords, target_size=(40, 48)):
    """
    Preprocesses the eye region for the CNN model.
    Args:
        frame: The input image frame (in BGR format).
        eye_coords: Coordinates of the eye region.
        target_size: The target size for each eye region.
    Returns:
        The preprocessed eye region.
    """
    x_min = min(x for x, y in eye_coords)
    x_max = max(x for x, y in eye_coords)
    y_min = min(y for x, y in eye_coords)
    y_max = max(y for x, y in eye_coords)

    # Cropping the eye region based on the extremities of the landmarks
    cropped_eye = frame[y_min:y_max, x_min:x_max]

    # Resizing the cropped eye region to the target size
    resized_eye = cv2.resize(cropped_eye, target_size)

    return resized_eye.astype(np.float32) / 255.0

In [14]:
def get_combined_eyes(frame):
    """
    Detects and combines the eye regions from the frame.
    Args:
        frame: The input image frame.
    Returns:
        The combined eye regions, or None if not detected.
    """
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)

    for face in faces:
        landmarks = predictor(gray, face)

        # Extract the coordinates for each eye
        left_eye = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(36, 42)]
        right_eye = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(42, 48)]

        # Preprocess each eye region
        left_eye_region = preprocess_eye_region(frame, left_eye)

        right_eye_region = preprocess_eye_region(frame, right_eye)

        # Combine the eyes side by side
        combined_eyes = np.hstack([left_eye_region, right_eye_region])

        # Ensure the combined eyes image has the correct shape
        if combined_eyes.shape[1] != 80:
            raise ValueError("Combined eyes region does not match the expected width.")


        return combined_eyes

    return None

In [15]:

def normalize_head_pose(head_pose_data, rotation_scale=180, translation_max_displacement=None):
    """
    Normalizes the head pose data.
    Args:
        head_pose_data: List containing the head pose data (rotation and translation vectors).
        rotation_scale: Maximum value for the rotation vector components (180 for degrees, np.pi for radians).
        translation_max_displacement: A tuple (max_x, max_y, max_z) representing the maximum displacement in each axis. If None, standard deviation normalization will be used.

    Returns:
        Normalized head pose data.
    """
    # Normalize rotation vectors
    normalized_rotation = np.array(head_pose_data[:3]) / rotation_scale

    # Normalize translation vectors
    if translation_max_displacement:
        max_x, max_y, max_z = translation_max_displacement
        normalized_translation = np.array(head_pose_data[3:]) / np.array([max_x, max_y, max_z])
    else:
        # Standard deviation normalization
        translation_vector = np.array(head_pose_data[3:])
        std_dev = np.std(translation_vector)
        mean_val = np.mean(translation_vector)
        normalized_translation = (translation_vector - mean_val) / std_dev

    return np.concatenate([normalized_rotation, normalized_translation]).tolist()

In [16]:
screenWidth, screenHeight =  2560, 1440 #pyautogui.size() # Get the size of the primary monitor.

In [17]:
#load the model
from keras.models import load_model
model = load_model('./models/eye_gaze_v11_3600v2_linear.h5')

In [18]:
import cv2
import numpy as np
from collections import deque

# Initialize a queue to store gaze points (Change the number of points as needed)
n_points = 5
gaze_points_queue = deque(maxlen=n_points)

def moving_average(new_point, queue):
    queue.append(new_point)
    return [sum(x) / len(queue) for x in zip(*queue)]

cap = cv2.VideoCapture(0)
cv2.namedWindow('Gaze Tracking on Canvas', cv2.WINDOW_NORMAL)
cv2.setWindowProperty('Gaze Tracking on Canvas', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

# Assuming screenHeight and screenWidth are defined
canvas = np.zeros((screenHeight, screenWidth, 3), dtype=np.uint8)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    combined_eyes = get_combined_eyes(frame)

    if combined_eyes is not None:
        combined_eyes = np.expand_dims(combined_eyes, axis=0)
        predicted_gaze = model.predict(combined_eyes)[0]

        # Scale the prediction to screen size
        gaze_x_scaled = int(predicted_gaze[0] * screenWidth)
        gaze_y_scaled = int(predicted_gaze[1] * screenHeight)

        # Apply moving average filter
        # gaze_x_smooth, gaze_y_smooth = moving_average((gaze_x_scaled, gaze_y_scaled), gaze_points_queue)

        # Clamp to screen size
        gaze_x_scaled = max(0, min(gaze_x_scaled, screenWidth - 1))
        gaze_y_scaled = max(0, min(gaze_y_scaled, screenHeight - 1))

        canvas.fill(0)
        cv2.circle(canvas, (gaze_x_scaled, gaze_y_scaled), 10, (0, 255, 0), -1)
        cv2.imshow('Gaze Tracking on Canvas', canvas)
    else:
        pass

    cv2.imshow('Original Webcam Feed', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


