In [17]:
import numpy as np
import cv2
import dlib
import os
import pandas as pd
import json
from glob import glob


In [18]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

In [19]:
def preprocess_eye_region(frame, eye_coords, target_size=(30, 36)):
    """
    Preprocesses the eye region for the CNN model.
    Args:
        frame: The input image frame (in BGR format).
        eye_coords: Coordinates of the eye region.
        target_size: The target size for each eye region.
    Returns:
        The preprocessed eye region.
    """
    x_min = min(x for x, y in eye_coords)
    x_max = max(x for x, y in eye_coords)
    y_min = min(y for x, y in eye_coords)
    y_max = max(y for x, y in eye_coords)

    # Cropping the eye region based on the extremities of the landmarks
    cropped_eye = frame[y_min:y_max, x_min:x_max]

    # Resizing the cropped eye region to the target size
    resized_eye = cv2.resize(cropped_eye, target_size)

    return resized_eye.astype(np.float32) / 255.0

In [20]:

def normalize_head_pose(head_pose_data, rotation_scale=180, translation_max_displacement=None):
    """
    Normalizes the head pose data.
    Args:
        head_pose_data: List containing the head pose data (rotation and translation vectors).
        rotation_scale: Maximum value for the rotation vector components (180 for degrees, np.pi for radians).
        translation_max_displacement: A tuple (max_x, max_y, max_z) representing the maximum displacement in each axis. If None, standard deviation normalization will be used.

    Returns:
        Normalized head pose data.
    """
    # Normalize rotation vectors
    normalized_rotation = np.array(head_pose_data[:3]) / rotation_scale

    # Normalize translation vectors
    if translation_max_displacement:
        max_x, max_y, max_z = translation_max_displacement
        normalized_translation = np.array(head_pose_data[3:]) / np.array([max_x, max_y, max_z])
    else:
        # Standard deviation normalization
        translation_vector = np.array(head_pose_data[3:])
        std_dev = np.std(translation_vector)
        mean_val = np.mean(translation_vector)
        normalized_translation = (translation_vector - mean_val) / std_dev

    return np.concatenate([normalized_rotation, normalized_translation]).tolist()

In [21]:
def get_combined_eyes(frame):
    """
    Detects and combines the eye regions from the frame.
    Args:
        frame: The input image frame.
    Returns:
        The combined eye regions, or None if not detected.
    """
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)

    for face in faces:
        landmarks = predictor(gray, face)

        # Extract the coordinates for each eye
        left_eye = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(36, 42)]
        right_eye = [(landmarks.part(n).x, landmarks.part(n).y) for n in range(42, 48)]

        # Preprocess each eye region
        left_eye_region = preprocess_eye_region(frame, left_eye)

        right_eye_region = preprocess_eye_region(frame, right_eye)

        # Combine the eyes side by side
        combined_eyes = np.hstack([left_eye_region, right_eye_region])

        # Ensure the combined eyes image has the correct shape
        if combined_eyes.shape[1] != 60:
            raise ValueError("Combined eyes region does not match the expected width.")


        return combined_eyes

    return None

In [26]:

# Assuming normalize_head_pose and get_combined_eyes are defined as before
def get_screen_size(metadata_file_path):
    with open(metadata_file_path, 'r') as f:
        metadata = json.load(f)

        # Check if 'screenData' is a key in the metadata
        if 'screenData' in metadata:
            metadata = metadata['screenData']
        # Otherwise, assume the metadata is already at the top level

        screen_width = metadata.get('screenWidth')
        screen_height = metadata.get('screenHeight')

        if screen_width is None or screen_height is None:
            raise ValueError("Screen size not found in metadata")

        return screen_width, screen_height


def parse_head_pose_data(row):
    # Split the strings and convert to float
    rotation_str, translation_str = row['head_pose'], row['head_translation']
    rotation = [float(x) for x in rotation_str.strip('"').split(',')]
    translation = [float(x) for x in translation_str.strip('"').split(',')]
    return rotation + translation  # Combine into a single list

def prepare_dataset(base_dir):
    X, Y = [], []
    
    column_names = ['image_path', 'cursor_x', 'cursor_y', 'eye_x1', 'eye_y1', 'eye_x2', 'eye_y2', 'eye_x3', 'eye_y3', 'eye_x4', 'eye_y4', 'eye_x5', 'eye_y5', 'eye_x6', 'eye_y6', 'head_pose', 'head_translation']

    for subdir in glob(os.path.join(base_dir, '*/')):
        if 'calibration' in os.path.basename(os.path.normpath(subdir)).lower():
            continue
        print(f"Processing directory: {subdir}")
        metadata_file_path = os.path.join(subdir, 'metadata.json')
        screen_width, screen_height = get_screen_size(metadata_file_path)
        print(f"Screen size: {screen_width}x{screen_height}")

        # Find any CSV file in the directory
        csv_files = glob(os.path.join(subdir, '*.csv'))
        if not csv_files:
            print(f"No data CSV file found in directory: {subdir}")
            continue
        data_file_path = csv_files[0]

        # Find any directory that contains image files (assuming JPEG for example)
        img_folders = [d for d in os.listdir(subdir) if os.path.isdir(os.path.join(subdir, d)) and glob(os.path.join(subdir, d, '*.png'))]
        if not img_folders:
            print(f"No image folder found that contains images in directory: {subdir}")
            continue
        img_folder = os.path.join(subdir, img_folders[0])

        data = pd.read_csv(data_file_path, header=None, names=column_names)

        for index, row in data.iterrows():
            # Directly use the image path from the dataframe
            img_path = os.path.join(row['image_path'])
            cursor_x, cursor_y = row['cursor_x'], row['cursor_y']
            eye_box_pupil_data = row[3:15].tolist()
            head_pose_data = parse_head_pose_data(row)

            normalized_eye_box_pupil_data = [float(coord) / screen_width if i % 2 == 0 else float(coord) / screen_height for i, coord in enumerate(eye_box_pupil_data)]
            normalized_head_pose_data = normalize_head_pose(head_pose_data)

            # Load the image
            img = cv2.imread(img_path)
            if img is None:
                print(f"Image not found: {img_path}")
                continue

            combined_eyes = get_combined_eyes(img)

            # Append to datasets
            Y.append([cursor_x / screen_width, cursor_y / screen_height] + normalized_eye_box_pupil_data + normalized_head_pose_data)
            X.append(combined_eyes)
    return X, Y

# Example usage:
base_dir = './data'
X, Y = prepare_dataset(base_dir)


Processing directory: ./data\final_boss\
Screen size: 1707x960
Processing directory: ./data\final_final\
Screen size: 1707x960
Processing directory: ./data\Hossein\
Screen size: 1536x864
Processing directory: ./data\lucy\
Screen size: 1440x900
Processing directory: ./data\melissa\
Screen size: 1710x1112
Processing directory: ./data\radabeat\
Screen size: 1707x960
Processing directory: ./data\Shaq\
Screen size: 1280x720
Processing directory: ./data\testing\
Screen size: 1707x960
Processing directory: ./data\Will\
Screen size: 1707x960
Processing directory: ./data\William\
Screen size: 1707x960
Processing directory: ./data\William1\
Screen size: 1707x960
Processing directory: ./data\WILLY\
Screen size: 1707x960


In [27]:
len(X), len(Y)

(2376, 2376)

In [21]:
Y[1]

[0.011716461628588167,
 0.9791666666666666,
 0.14997070884592853,
 0.259375,
 0.1411833626244874,
 0.25729166666666664,
 0.013473930872876391,
 0.007291666666666667,
 0.18219097832454598,
 0.2625,
 0.1763327475102519,
 0.259375,
 0.014059753954305799,
 0.008333333333333333,
 -0.01723075272573134,
 -0.0007735244672992313,
 -0.003258087957893779,
 -0.8616342876924259,
 -0.5403624305397883,
 1.4019967182322142]

In [14]:
def transform_dataset(df):
    # Function to convert rotation and translation columns into string format
    def convert_to_string(row):
        rotation = ','.join(map(str, [row['rotation_x'], row['rotation_y'], row['rotation_z']]))
        translation = ','.join(map(str, [row['translation_x'], row['translation_y'], row['translation_z']]))
        return f'"{rotation}"', f'"{translation}"'

    # Apply the function to each row
    df[['rotation_x', 'translation_x']] = df.apply(convert_to_string, axis=1).tolist()

    # Drop the now-redundant columns
    df.drop(columns=['rotation_y', 'rotation_z', 'translation_y', 'translation_z'], inplace=True)

    # Rename columns to match the desired output format
    df.rename(columns={'rotation_x': 'head_pose', 'translation_x': 'head_translation'}, inplace=True)

    return df

In [16]:
# Load your original dataset
df = pd.read_csv('data/William1/data.csv', header=None)

# Define column names if they are not already present
df.columns = [
    'image_path', 'cursor_x', 'cursor_y',
    'eye_x1', 'eye_y1', 'eye_x2', 'eye_y2', 'eye_x3', 'eye_y3', 'eye_x4', 'eye_y4', 'eye_x5', 'eye_y5', 'eye_x6', 'eye_y6',
    'rotation_x', 'rotation_y', 'rotation_z', 'translation_x', 'translation_y', 'translation_z'
]

# Transform the dataset
df_transformed = transform_dataset(df)

# Save the modified dataset
df_transformed.to_csv('data2.csv', index=False, header=False)
