In [1]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms, models
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
# Load an image for prediction (for example, from a file)
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.models import load_model
from tensorflow.keras.utils import load_img, img_to_array
import cv2

import numpy as np
import mediapipe as mp

In [2]:
# === Load models ===
face_model = load_model("../models/face_direction_model2.h5")
eye_model = load_model("../models/eyes_dir_model.h5")




In [57]:
# === Class labels ===
face_labels = ['left', 'right', 'front']
eye_labels = [
    "BottomCenter",
    "BottomLeft",
    "BottomRight",
    "MiddleLeft",
    "MiddleRight",
    "TopCenter",
    "TopLeft",
    "TopRight"
]

In [None]:

# === Parameters ===
img_path = "../Datasets/t2.jpg"  # Use the same image for both models
face_img = cv2.imread(img_path)
if face_img is None:
    raise FileNotFoundError(f"Image not found at path: {img_path}")
eye_img = face_img.copy()

face_img_size = (128, 128)
eye_img_size = (64, 64)

# === Face direction prediction ===
face_img_resized = cv2.resize(face_img, face_img_size)
face_img_resized = face_img_resized.astype('float32') / 255.0
face_img_array = np.expand_dims(face_img_resized, axis=0)

face_prediction = face_model.predict(face_img_array, verbose=0)[0]
left, right, front = face_prediction

# === Custom logic for face direction ===
diff_front_right = front - right


if (
    # If front is highest, but right is close enough, prefer right
    (front >= 0.25 and 0.01 <= diff_front_right <= 0.18)
    or
    # If right is clearly stronger than left
    (right > 0.5 and left < 0.25)
    or
    (right > 2 * left)
):
    predicted_face_dir = "right"
elif (left > right) or (left > 0.2 and right < 0.5):
    predicted_face_dir = "left"
else:
    predicted_face_dir = face_labels[np.argmax(face_prediction)]  # default to max


# === Eye detection and prediction ===
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True)
rgb_eye_img = cv2.cvtColor(eye_img, cv2.COLOR_BGR2RGB)

results = face_mesh.process(rgb_eye_img)
# === Fallback setup for Haar cascades ===
eye_detected = False
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")


h, w = eye_img.shape[:2]
left_eye_prediction = None
right_eye_prediction = None


def preprocess_eye(eye_roi):
    eye_gray = cv2.cvtColor(eye_roi, cv2.COLOR_BGR2GRAY)
    eye_resized = cv2.resize(eye_gray, eye_img_size)
    eye_array = eye_resized.astype("float32") / 255.0
    eye_array = np.expand_dims(eye_array, axis=[0, -1])
    return eye_array


def predict_eye(eye_array):
    prediction = eye_model.predict(eye_array, verbose=0)[0]
    idx = np.argmax(prediction)
    return eye_labels[idx], prediction[idx], prediction


if results.multi_face_landmarks:
    for face_landmarks in results.multi_face_landmarks:
        left_eye_idx = [33, 133]
        right_eye_idx = [362, 263]

        def extract_eye(indices):
            x = [int(face_landmarks.landmark[i].x * w) for i in indices]
            y = [int(face_landmarks.landmark[i].y * h) for i in indices]
            x1, x2 = min(x), max(x)
            y1, y2 = min(y), max(y)
            mx, my = int((x2 - x1) * 0.4), int((y2 - y1) * 1.2)
            return max(x1 - mx, 0), max(y1 - my, 0), min(x2 + mx, w), min(y2 + my, h)

        for label, indices in zip(["Left", "Right"], [left_eye_idx, right_eye_idx]):
            x1, y1, x2, y2 = extract_eye(indices)
            eye_roi = eye_img[y1:y2, x1:x2]
            if eye_roi.size == 0:
                continue
            eye_array = preprocess_eye(eye_roi)
            pred_label, prob, pred_vector = predict_eye(eye_array)
            cv2.rectangle(eye_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(eye_img, f"{label} Eye: {pred_label}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
            if label == "Left":
                left_eye_prediction = (pred_label, prob, pred_vector)
            else:
                right_eye_prediction = (pred_label, prob, pred_vector)
            eye_detected = True

else:
    # === Fallback: Haar Cascade eye detection ===
    print("⚠️ MediaPipe failed, using Haar Cascade fallback.")

    gray = cv2.cvtColor(eye_img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5)

    for (x, y, w_, h_) in faces:
        roi_gray = gray[y:y+h_, x:x+w_]
        roi_color = eye_img[y:y+h_, x:x+w_]
        eyes = eye_cascade.detectMultiScale(roi_gray)

        for i, (ex, ey, ew, eh) in enumerate(eyes[:2]):  # Limit to 2 eyes
            if ew < 10 or eh < 10 or ew > w_ // 2:
                continue

            eye_roi = roi_color[ey:ey+eh, ex:ex+ew]
            if eye_roi.size == 0:
                continue

            eye_array = preprocess_eye(eye_roi)
            pred_label, prob, pred_vector = predict_eye(eye_array)

            label = "Left" if i == 0 else "Right"
            cv2.putText(eye_img, f"{label} Eye: {pred_label}", (x + ex, y + ey - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
            cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 255), 2)

            if label == "Left":
                left_eye_prediction = (pred_label, prob, pred_vector)
            else:
                right_eye_prediction = (pred_label, prob, pred_vector)
            eye_detected = True






# === Final unified logic (revised) ===
# === Final unified logic (final adjusted version) ===
eye_final_label = None

if left_eye_prediction and right_eye_prediction:
    left_label = left_eye_prediction[0]
    right_label = right_eye_prediction[0]

    if left_label == right_label:
        eye_final_label = left_label
    else:
        # You can decide which one is more confident
        if left_eye_prediction[1] > right_eye_prediction[1]:
            eye_final_label = left_label
        else:
            eye_final_label = right_label
# === Final unified logic ===
final_direction = predicted_face_dir

if left_eye_prediction and right_eye_prediction:
    # Get the prediction vectors for both eyes
    left_vector = left_eye_prediction[2]
    right_vector = right_eye_prediction[2]

    # Define individual thresholds
    thresholds = {
        "TopCenter": 0.70,
        "BottomCenter": 0.98  # Less influence
    }

    for label, threshold in thresholds.items():
        idx = eye_labels.index(label)
        if left_vector[idx] > threshold or right_vector[idx] > threshold:
            final_direction = "front"
            break

# === Display results ===
cv2.putText(eye_img, f"Final Direction: {final_direction.upper()}", (30, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

cv2.imshow("Unified Gaze Output", eye_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

# === Console logs ===
print("\n=== Face Direction Model Prediction Scores ===")
for i, label in enumerate(face_labels):
    print(f"{label}: {face_prediction[i]:.4f}")

print(f"\n→ Final Direction: {final_direction.upper()}")

if eye_final_label:
    print("\n=== Eye Model Decision ===")
    print(f"Left Eye: {left_eye_prediction[0]} ({left_eye_prediction[1]:.4f})")
    print(f"Right Eye: {right_eye_prediction[0]} ({right_eye_prediction[1]:.4f})")
    print(f"→ Synchronized Eye Label: {eye_final_label}")



=== Face Direction Model Prediction Scores ===
left: 0.2462
right: 0.4121
front: 0.3418

→ Final Direction: LEFT

=== Eye Model Decision ===
Left Eye: MiddleRight (0.4245)
Right Eye: MiddleRight (0.5853)
→ Synchronized Eye Label: MiddleRight


In [148]:

# === Parameters ===
img_path = "../Datasets/t5.jpg"  # Use the same image for both models
face_img = cv2.imread(img_path)
if face_img is None:
    raise FileNotFoundError(f"Image not found at path: {img_path}")
eye_img = face_img.copy()

face_img_size = (128, 128)
eye_img_size = (64, 64)

# === Face direction prediction ===
face_img_resized = cv2.resize(face_img, face_img_size)
face_img_resized = face_img_resized.astype('float32') / 255.0
face_img_array = np.expand_dims(face_img_resized, axis=0)

face_prediction = face_model.predict(face_img_array, verbose=0)[0]
left, right, front = face_prediction

# === Custom logic for face direction ===
diff_front_right = front - right


if (
    # If front is highest, but right is close enough, prefer right
    (front >= 0.25 and 0.01 <= diff_front_right <= 0.2)
    or
    # If right is clearly stronger than left
    (right > 0.5 and left < 0.25)
    or
    (right > 2 * left)
):
    predicted_face_dir = "right"
elif (left > right) or (left > 0.2 and right < 0.5):
    predicted_face_dir = "left"
else:
    predicted_face_dir = face_labels[np.argmax(face_prediction)]  # default to max



# === Read input image ===
img = eye_img  # <-- Change as needed
if img is None:
    raise ValueError("Image not found!")


# === Load trained model ===
model = eye_model
img_size = eye_img_size

# === Class labels ===
class_labels = eye_labels  # <-- Dynamically picked

# === Initialize MediaPipe face mesh ===
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True)

# === Load Haar cascades (fallback if MediaPipe fails) ===
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")

# Resize if too large
max_width, max_height = 800, 600
h, w = img.shape[:2]
if w > max_width or h > max_height:
    scale = min(max_width / w, max_height / h)
    img = cv2.resize(img, (int(w * scale), int(h * scale)))
    h, w = img.shape[:2]

# Convert to RGB
rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
results = face_mesh.process(rgb_img)

# Flag to check if MediaPipe worked
eye_detected = False

# === Helper functions ===
def preprocess_eye(eye_roi):
    eye_gray = cv2.cvtColor(eye_roi, cv2.COLOR_BGR2GRAY)
    eye_resized = cv2.resize(eye_gray, img_size)
    eye_array = eye_resized.astype("float32") / 255.0
    eye_array = np.expand_dims(eye_array, axis=[0, -1])  # (1, 64, 64, 1)
    return eye_array

def predict_eye(eye_array):
    prediction = model.predict(eye_array, verbose=0)[0]
    predicted_idx = np.argmax(prediction)
    return class_labels[predicted_idx], prediction[predicted_idx], prediction

def print_probs(prediction, class_labels):
    print("Probabilities:")
    for label, prob in zip(class_labels, prediction):
        print(f"  {label}: {prob:.4f}")
    predicted_idx = np.argmax(prediction)
    return class_labels[predicted_idx], prediction[predicted_idx]

# === Try MediaPipe face mesh first ===
if results.multi_face_landmarks:
    for face_landmarks in results.multi_face_landmarks:
        left_eye_indices = [33, 133]
        right_eye_indices = [362, 263]

        def extract_eye_region(indices):
            x_coords = [int(face_landmarks.landmark[i].x * w) for i in indices]
            y_coords = [int(face_landmarks.landmark[i].y * h) for i in indices]
            x_min, x_max = min(x_coords), max(x_coords)
            y_min, y_max = min(y_coords), max(y_coords)
            margin_x = int((x_max - x_min) * 0.4)
            margin_y = int((y_max - y_min) * 1.2)
            x1 = max(x_min - margin_x, 0)
            y1 = max(y_min - margin_y, 0)
            x2 = min(x_max + margin_x, w)
            y2 = min(y_max + margin_y, h)
            return x1, y1, x2, y2

        left_pred = right_pred = None

        for eye_label, indices in zip(["Left", "Right"], [left_eye_indices, right_eye_indices]):
            x1, y1, x2, y2 = extract_eye_region(indices)
            eye_roi = img[y1:y2, x1:x2]
            if eye_roi.size == 0:
                continue

            eye_array = preprocess_eye(eye_roi)
            predicted_label, predicted_prob, prediction = predict_eye(eye_array)

            if eye_label == "Left":
                left_pred = (predicted_label, predicted_prob, prediction)
            else:
                right_pred = (predicted_label, predicted_prob, prediction)

            label_text = f"{eye_label}: {predicted_label} ({predicted_prob:.2f})"
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.rectangle(eye_img, (x1, y1), (x2, y2), (0, 255, 0), 2)

            cv2.putText(img, label_text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

            print(f"[{eye_label} Eye] {label_text}")
            print_probs(prediction, class_labels)
            eye_detected = True

        if left_pred and right_pred:
            # After MediaPipe (inside if left_pred and right_pred:) 
            left_eye_prediction = left_pred
            right_eye_prediction = right_pred

            

           

# === Fallback to Haar cascades ===
if not eye_detected:
    print("⚠️ MediaPipe failed, using Haar cascades...")
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)

    for (x, y, w, h) in faces:
        roi_gray = gray[y:y+h, x:x+w]
        roi_color = img[y:y+h, x:x+w]
        eyes = eye_cascade.detectMultiScale(roi_gray)

        predictions = []

        for i, (ex, ey, ew, eh) in enumerate(eyes[:2]):
            if ew < 10 or eh < 10 or ey > h // 2:
                continue

            eye_roi = roi_color[ey:ey+eh, ex:ex+ew]
            if eye_roi.size == 0:
                continue

            eye_array = preprocess_eye(eye_roi)
            predicted_label, predicted_prob, prediction = predict_eye(eye_array)
            predictions.append((predicted_label, predicted_prob, prediction))

            label_text = f"Eye{i+1}: {predicted_label} ({predicted_prob:.2f})"
            cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (255, 0, 0), 2)
            cv2.rectangle(eye_img[y:y+h, x:x+w], (ex, ey), (ex+ew, ey+eh), (255, 0, 0), 2)

            cv2.putText(roi_color, label_text, (ex, ey - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (255, 0, 0), 1)

            print(f"[Haar Eye {i+1}] {label_text}")
            print_probs(prediction, class_labels)

        if len(predictions) == 2:
            # After Haar (inside if len(predictions) == 2:)
            left_eye_prediction = predictions[0]
            right_eye_prediction = predictions[1]

            if predictions[0][1] > predictions[1][1]:
                synchronized_label = predictions[0][0]
            else:
                synchronized_label = predictions[1][0]

            



eye_final_label = None

if left_eye_prediction and right_eye_prediction:
    left_label = left_eye_prediction[0]
    right_label = right_eye_prediction[0]

    if left_label == right_label:
        eye_final_label = left_label
    else:
        # You can decide which one is more confident
        if left_eye_prediction[1] > right_eye_prediction[1]:
            eye_final_label = left_label
        else:
            eye_final_label = right_label

# === Final unified logic ===
final_direction = predicted_face_dir

if left_eye_prediction and right_eye_prediction:
    # Get the prediction vectors for both eyes
    left_vector = left_eye_prediction[2]
    right_vector = right_eye_prediction[2]

    # Define individual thresholds
    thresholds = {
        "TopCenter": 0.70,
        "BottomCenter": 0.98  # Less influence
    }

    for label, threshold in thresholds.items():
        idx = eye_labels.index(label)
        if left_vector[idx] > threshold or right_vector[idx] > threshold:
            final_direction = "front"
            break



if predicted_face_dir == "right":
    if left_eye_prediction[0] == "BottomCenter":
        prob_diff = left_eye_prediction[1] - right_eye_prediction[1]
        if prob_diff > 0:
            final_direction = "front"

    elif right_eye_prediction[0] == "BottomCenter":
        prob_diff = right_eye_prediction[1] - left_eye_prediction[1]
        if prob_diff > 0:
            final_direction = "front"

        



if left_eye_prediction[1] > right_eye_prediction[1]:
    synchronized_label = left_pred[0]
else:
    synchronized_label = right_pred[0]

# cv2.putText(img, f"Synchronized: {synchronized_label}", (20, 30),
#                         cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
#                         print(f"=> Final Syn chronized Label: {synchronized_label}\n")

# === Display results ===
cv2.putText(img, f"Final Direction: {final_direction.upper()}", (30, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

cv2.imshow("Unified Gaze Output", img)

cv2.waitKey(0)
cv2.destroyAllWindows()

# === Console logs ===
print("\n=== Face Direction Model Prediction Scores ===")
for i, label in enumerate(face_labels):
    print(f"{label}: {face_prediction[i]:.4f}")

print(f"\n→ Final Direction: {final_direction.upper()}")

if eye_final_label:
    print("\n=== Eye Model Decision ===")
    print(f"Left Eye: {left_eye_prediction[0]} ({left_eye_prediction[1]:.4f})")
    print(f"Right Eye: {right_eye_prediction[0]} ({right_eye_prediction[1]:.4f})")
    print(f"→ Synchronized Eye Label: {eye_final_label}")



[Left Eye] Left: BottomCenter (0.32)
Probabilities:
  BottomCenter: 0.3165
  BottomLeft: 0.2133
  BottomRight: 0.2345
  MiddleLeft: 0.0846
  MiddleRight: 0.0952
  TopCenter: 0.0233
  TopLeft: 0.0249
  TopRight: 0.0076
[Right Eye] Right: TopLeft (0.69)
Probabilities:
  BottomCenter: 0.0008
  BottomLeft: 0.0009
  BottomRight: 0.0014
  MiddleLeft: 0.0775
  MiddleRight: 0.0706
  TopCenter: 0.0295
  TopLeft: 0.6907
  TopRight: 0.1286

=== Face Direction Model Prediction Scores ===
left: 0.2695
right: 0.3532
front: 0.3773

→ Final Direction: RIGHT

=== Eye Model Decision ===
Left Eye: BottomCenter (0.3165)
Right Eye: TopLeft (0.6907)
→ Synchronized Eye Label: TopLeft
