# Preprocessing

In [1]:
!pip install tensorflow==2.18.1 protobuf==4.25.3 mediapipe==0.10.21



In [10]:
import os
import cv2
import re
import shutil
import random
import numpy as np
from glob import glob
from tqdm import tqdm
from collections import defaultdict
import mediapipe as mp

random.seed(42028)

RAW_DATA_DIR = "/home/sagemaker-user/gesture-gaze-extension/datasets/Columbia Gaze Data Set"
OUTPUT_DIR = "/home/sagemaker-user/gesture-gaze-extension/datasets/ColumbiaGazeProcessed"
SPLITS = ['train', 'val', 'test']
SPLIT_RATIOS = {'train': 0.7, 'val': 0.15, 'test': 0.15}

# MediaPipe setup
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, refine_landmarks=True, max_num_faces=1)

def extract_gaze_from_filename(filename):
    match = re.search(r'(-?\d+)P_(-?\d+)V_(-?\d+)H', filename)
    if match:
        _, v, h = int(match.group(1)), int(match.group(2)), int(match.group(3))
        return v, h
    return None, None

def classify_gaze(v, h):
    if v == 0 and h == 0:
        return 'center'
    elif v == 0 and h < 0:
        return 'left'
    elif v == 0 and h > 0:
        return 'right'
    elif v > 0 and h == 0:
        return 'up'
    elif v < 0 and h == 0:
        return 'down'
    elif v > 0 and h < 0:
        return 'up_left'
    elif v > 0 and h > 0:
        return 'up_right'
    elif v < 0 and h < 0:
        return 'down_left'
    elif v < 0 and h > 0:
        return 'down_right'
    return None

def crop_eye_region(image, landmarks):
    h, w = image.shape[:2]

    LEFT_EYE_LANDMARKS = [33, 133, 160, 159, 158, 157, 173, 246]
    RIGHT_EYE_LANDMARKS = [362, 263, 387, 386, 385, 384, 398, 466]

    # Extract (x, y) pixel coordinates
    left_eye = np.array([[int(landmarks[idx].x * w), int(landmarks[idx].y * h)] for idx in LEFT_EYE_LANDMARKS])
    right_eye = np.array([[int(landmarks[idx].x * w), int(landmarks[idx].y * h)] for idx in RIGHT_EYE_LANDMARKS])
    eyes = np.vstack((left_eye, right_eye))

    x, y, eye_w, eye_h = cv2.boundingRect(eyes)
    margin = 100
    x = max(0, x - margin)
    y = max(0, y - margin)
    x2 = min(w, x + eye_w + 2 * margin)
    y2 = min(h, y + eye_h + 2 * margin)

    return image[y:y2, x:x2]

W0000 00:00:1747973161.455395   24894 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1747973161.473140   24893 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [None]:
image_paths = glob(os.path.join(RAW_DATA_DIR, "*/*.jpg"))
labeled_data = defaultdict(list)

for path in image_paths:
    filename = os.path.basename(path)
    v, h = extract_gaze_from_filename(filename)
    label = classify_gaze(v, h)
    if label:
        labeled_data[label].append(path)

for label, paths in labeled_data.items():
    random.shuffle(paths)
    n = len(paths)
    train_end = int(n * SPLIT_RATIOS['train'])
    val_end = train_end + int(n * SPLIT_RATIOS['val'])
    split_dict = {
        'train': paths[:train_end],
        'val': paths[train_end:val_end],
        'test': paths[val_end:]
    }

    for split, split_paths in split_dict.items():
        out_dir = os.path.join(OUTPUT_DIR, split, label)
        os.makedirs(out_dir, exist_ok=True)
        for img_path in tqdm(split_paths, desc=f"{split}/{label}"):
            img = cv2.imread(img_path)
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            results = face_mesh.process(img_rgb)
            if results.multi_face_landmarks:
                cropped = crop_eye_region(img, results.multi_face_landmarks[0].landmark)
                out_path = os.path.join(out_dir, os.path.basename(img_path))
                cv2.imwrite(out_path, cropped)

face_mesh.close()

train/down_left:  36%|███▌      | 211/588 [00:32<00:57,  6.59it/s]

In [12]:
import pandas as pd
from IPython.display import display

df = pd.DataFrame({k: [len(v)] for k, v in labeled_data.items()}, index=["# of Images"]).T
df.columns.name = "Labels"
display(df)

Labels,# of Images
down_left,840
down,280
down_right,840
left,840
center,280
right,840
up_left,840
up,280
up_right,840


In [14]:
shutil.make_archive("ColumbiaGazeProcessed","zip",OUTPUT_DIR)

'/home/sagemaker-user/gesture-gaze-extension/notebooks/ColumbiaGazeProcessed.zip'