In [42]:
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import dlib
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('./backend/shape_predictor_68_face_landmarks.dat')
def preprocess_eye_region(frame, eye_coords, target_size=(30, 36)):
    """
    Preprocesses the eye region for the CNN model.
    Args:
        frame: The input image frame (in BGR format).
        eye_coords: Coordinates of the eye region.
        target_size: The target size for each eye region.
    Returns:
        The preprocessed eye region.
    """
    x_min = min(x for x, y in eye_coords)
    x_max = max(x for x, y in eye_coords)
    y_min = min(y for x, y in eye_coords)
    y_max = max(y for x, y in eye_coords)

    # Cropping the eye region based on the extremities of the landmarks
    cropped_eye = frame[y_min:y_max, x_min:x_max]

    # Resizing the cropped eye region to the target size
    resized_eye = cv2.resize(cropped_eye, target_size)

    return resized_eye.astype(np.float32) / 255.0
def get_combined_eyes(frame):
    """
    Detects and combines the eye regions from the frame.
    Args:
        frame: The input image frame.
    Returns:
        The combined eye regions, or None if not detected.
    """
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)

    for face in faces:
        landmarks = predictor(gray, face)

        # Extract the coordinates for each eye
        left_eye = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(36, 42)]
        right_eye = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(42, 48)]

        # Preprocess each eye region
        left_eye_region = preprocess_eye_region(frame, left_eye)

        right_eye_region = preprocess_eye_region(frame, right_eye)

        # Combine the eyes side by side
        combined_eyes = np.hstack([left_eye_region, right_eye_region])

        # Ensure the combined eyes image has the correct shape
        if combined_eyes.shape[1] != 60:
            raise ValueError("Combined eyes region does not match the expected width.")


        return combined_eyes

    return None
def normalize_head_pose(head_pose_data, rotation_scale=180, translation_max_displacement=None):
    """
    Normalizes the head pose data.
    Args:
        head_pose_data: List containing the head pose data (rotation and translation vectors).
        rotation_scale: Maximum value for the rotation vector components (180 for degrees, np.pi for radians).
        translation_max_displacement: A tuple (max_x, max_y, max_z) representing the maximum displacement in each axis. If None, standard deviation normalization will be used.

    Returns:
        Normalized head pose data.
    """
    # Normalize rotation vectors
    normalized_rotation = np.array(head_pose_data[:3]) / rotation_scale

    # Normalize translation vectors
    if translation_max_displacement:
        max_x, max_y, max_z = translation_max_displacement
        normalized_translation = np.array(head_pose_data[3:]) / np.array([max_x, max_y, max_z])
    else:
        # Standard deviation normalization
        translation_vector = np.array(head_pose_data[3:])
        std_dev = np.std(translation_vector)
        mean_val = np.mean(translation_vector)
        normalized_translation = (translation_vector - mean_val) / std_dev

    return np.concatenate([normalized_rotation, normalized_translation]).tolist()



In [78]:
import os
def prepare_dataset(data_file_path, screen_size, img_folder, default_head_pose=[0, 0, 0, 0, 0, 0]):
    X, Y = [], []

    screen_width, screen_height = screen_size

    with open(data_file_path, 'r') as file:
        for line in file:
            parts = line.strip().split(',')
            img_name = parts[0][-45:]  # Extracting the image name

            cursor_x, cursor_y = map(float, parts[1:3])
            eye_box_pupil_data = list(map(float, parts[3:15]))
            normalized_eye_box_pupil_data = [coord / screen_width if i % 2 == 0 else coord / screen_height for i, coord in enumerate(eye_box_pupil_data)]

            if len(parts) > 18:  # Ensure enough parts are present for head pose data
                # Remove quotation marks and convert to floats
                rotation_data = [float(x.replace('"', '').strip()) for x in parts[15:18]]
                translation_data = [float(x.replace('"', '').strip()) for x in parts[18:21]]
                head_pose_data = rotation_data + translation_data
            else:
                head_pose_data = default_head_pose

            normalize_head_pose_ = normalize_head_pose(head_pose_data)

            img_path = os.path.join(img_folder, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            combined_eyes = get_combined_eyes(img)
            X.append(combined_eyes)
            Y.append([cursor_x / screen_width, cursor_y / screen_height] + normalized_eye_box_pupil_data + normalize_head_pose_)

    return X, Y


In [79]:
# Usage of the function:
screen_size = (1707, 960)
# Call the function with data that does not have head pose information
X, Y = prepare_dataset('./data/Will/data.csv', screen_size, img_folder='./data/Will/images/')

In [80]:
len(Y)

59

In [81]:
Y[1]

[0.9982425307557118,
 0.004166666666666667,
 0.15700058582308143,
 0.184375,
 0.14586994727592267,
 0.17708333333333334,
 0.01757469244288225,
 0.014583333333333334,
 0.19390743995313414,
 0.18541666666666667,
 0.18922085530169888,
 0.18020833333333333,
 0.018746338605741066,
 0.013541666666666667,
 -0.016105093800901846,
 -0.0004740759114885289,
 0.0026191724543021613,
 -0.7459235665832105,
 -0.6675662195088178,
 1.4134897860920284]