In [1]:
import sys
from pathlib import Path

parent_dir = Path.cwd().parent.parent
sys.path.append(str(parent_dir))

In [2]:
import numpy as np
import cv2
import dlib
from data_processing.image_processing import ImageProcessor
global_detector = dlib.get_frontal_face_detector()
global_predictor = dlib.shape_predictor('../shape_predictor_68_face_landmarks.dat')
global_sr_model = cv2.dnn_superres.DnnSuperResImpl_create()
global_sr_model.readModel("../EDSR_x4.pb")
global_sr_model.setModel("edsr", 2)
ImageProcessor = ImageProcessor(global_detector, global_predictor, global_sr_model)

In [3]:
def get_combined_eyes(frame, global_detector, global_predictor, target_size=(200, 100)):
    """
    Detects, enhances, and combines the eye regions including the nose bridge from the frame.
    Args:
        frame: The input image frame.
        global_detector: Face detector.
        global_predictor: Landmark predictor.
        target_size: Target size for resizing the combined eye region.
    Returns:
        The combined eye regions including the nose bridge, or None if not detected.
    """
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = global_detector(gray)

    # super resolution image
    for face in faces:
        landmarks = global_predictor(gray, face)

        forehead_points = [20, 21, 22, 23, 0 ,16]
        left_eye_points = [36, 37, 38, 39, 40, 41]
        right_eye_points = [42, 43, 44, 45, 46, 47]
        nose_bridge_points = [27, 28, 29]  # Adjust if necessary for your landmarks

        # Extract the combined region of both eyes including the nose bridge
        # Make sure to only use the first returned value (the image)

        combined_eye_region, _ = ImageProcessor.extract_eye_region(
            frame, landmarks, left_eye_points, right_eye_points, nose_bridge_points, forehead_points)

        if isinstance(combined_eye_region, np.ndarray):

            # Apply super-resolution
            # combined_eye_super_res = ImageProcessor.enhance_image_resolution(combined_eye_region, global_sr_model)

            # Resize to the final target size
            combined_eye_final_resized = cv2.resize(combined_eye_region, target_size, interpolation=cv2.INTER_AREA)

            # combined_eye_final_resized = cv2.cvtColor(combined_eye_final_resized, cv2.COLOR_BGR2GRAY)


            # Normalize if necessary
            combined_eye_final_resized = combined_eye_final_resized.astype(np.float32) / 255.0

            return combined_eye_final_resized

    return None


In [4]:

def normalize_head_pose(head_pose_data, rotation_scale=180, translation_max_displacement=None):
    """
    Normalizes the head pose data.
    Args:
        head_pose_data: List containing the head pose data (rotation and translation vectors).
        rotation_scale: Maximum value for the rotation vector components (180 for degrees, np.pi for radians).
        translation_max_displacement: A tuple (max_x, max_y, max_z) representing the maximum displacement in each axis. If None, standard deviation normalization will be used.

    Returns:
        Normalized head pose data.
    """
    # Normalize rotation vectors
    normalized_rotation = np.array(head_pose_data[:3]) / rotation_scale

    # Normalize translation vectors
    if translation_max_displacement:
        max_x, max_y, max_z = translation_max_displacement
        normalized_translation = np.array(head_pose_data[3:]) / np.array([max_x, max_y, max_z])
    else:
        # Standard deviation normalization
        translation_vector = np.array(head_pose_data[3:])
        std_dev = np.std(translation_vector)
        mean_val = np.mean(translation_vector)
        normalized_translation = (translation_vector - mean_val) / std_dev

    return np.concatenate([normalized_rotation, normalized_translation]).tolist()

In [15]:
#load the model
from keras.models import load_model
model = load_model('../models/eye_gaze_v22v1.h5')
model.summary()


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 94, 194, 32)       4736      
                                                                 
 conv2d_9 (Conv2D)           (None, 88, 188, 64)       100416    
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 44, 94, 64)       0         
 2D)                                                             
                                                                 
 batch_normalization_2 (Batc  (None, 44, 94, 64)       256       
 hNormalization)                                                 
                                                                 
 dropout_6 (Dropout)         (None, 44, 94, 64)        0         
                                                                 
 conv2d_10 (Conv2D)          (None, 40, 90, 128)      

In [12]:
import cv2
import numpy as np
from collections import deque
import pickle

# Initialize a queue to store gaze points (Change the number of points as needed)
n_points = 5
gaze_points_queue = deque(maxlen=n_points)
adjusted_gaze_points_queue = deque(maxlen=n_points)  # For adjusted points

# Screen dimensions
screenWidth = 1707
screenHeight = 960

with open('../pickel_files/adjustment_model.pkl', 'rb') as f:
    adjustment_model = pickle.load(f)

def moving_average(new_point, queue):
    queue.append(new_point)
    return [sum(x) / len(queue) for x in zip(*queue)]

cap = cv2.VideoCapture(0)
cv2.namedWindow('Gaze Tracking on Canvas', cv2.WINDOW_NORMAL)
cv2.setWindowProperty('Gaze Tracking on Canvas', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

canvas = np.zeros((screenHeight, screenWidth, 3), dtype=np.uint8)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    combined_eyes = get_combined_eyes(frame, global_detector, global_predictor)

    if combined_eyes is not None:
        combined_eyes = np.expand_dims(combined_eyes, axis=0)
        predicted_gaze = model.predict(combined_eyes)[0]

        gaze_x_scaled = int(predicted_gaze[0] * screenWidth)
        gaze_y_scaled = int(predicted_gaze[1] * screenHeight)

        adjusted_pred = adjustment_model.predict(predicted_gaze.reshape(1, -1))[0]
        adjusted_x, adjusted_y = adjusted_pred[0] * screenWidth, adjusted_pred[1] * screenHeight

        # Apply moving average filter to both raw and adjusted gaze points
        gaze_x_smooth, gaze_y_smooth = moving_average((gaze_x_scaled, gaze_y_scaled), gaze_points_queue)
        adjusted_x_smooth, adjusted_y_smooth = moving_average((adjusted_x, adjusted_y), adjusted_gaze_points_queue)

        # Clamp to screen size after smoothing
        gaze_x_smooth = max(0, min(int(gaze_x_smooth), screenWidth - 1))
        gaze_y_smooth = max(0, min(int(gaze_y_smooth), screenHeight - 1))
        adjusted_x_smooth = max(0, min(int(adjusted_x_smooth), screenWidth - 1))
        adjusted_y_smooth = max(0, min(int(adjusted_y_smooth), screenHeight - 1))

        canvas.fill(0)  # Clear canvas for fresh drawing
        # Draw smoothed points
        cv2.circle(canvas, (gaze_x_smooth, gaze_y_smooth), 10, (0, 255, 0), -1)
        cv2.circle(canvas, (adjusted_x_smooth, adjusted_y_smooth), 10, (255, 0, 0), -1)

        cv2.imshow('Gaze Tracking on Canvas', canvas)

    cv2.imshow('Original Webcam Feed', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




In [None]:
import cv2
import numpy as np
from collections import deque
import pickle

# Initialize a queue to store gaze points
n_points = 100  # Increased to store more points for the heatmap
gaze_points_queue = deque(maxlen=n_points)

# Screen dimensions
screenWidth = 1707
screenHeight = 960

with open('../pickel_files/adjustment_model.pkl', 'rb') as f:
    adjustment_model = pickle.load(f)

# Initialize an empty heatmap
heatmap = np.zeros((screenHeight, screenWidth), dtype=np.float32)

def update_heatmap(points, heatmap, decay=0.95, intensity=5):
    # Apply decay to fade old points
    heatmap *= decay
    
    # Add new points with specified intensity
    for x, y in points:
        if 0 <= x < screenWidth and 0 <= y < screenHeight:
            heatmap[y, x] += intensity  # Increase intensity at gaze point
    
    # Clamp values to a maximum to prevent overflow
    np.clip(heatmap, 0, 255, out=heatmap)
    
    return heatmap

cap = cv2.VideoCapture(0)
cv2.namedWindow('Gaze Tracking with Heatmap', cv2.WINDOW_NORMAL)
cv2.setWindowProperty('Gaze Tracking with Heatmap', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Assuming get_combined_eyes, global_detector, global_predictor, and model are defined elsewhere
    combined_eyes = get_combined_eyes(frame, global_detector, global_predictor)

    if combined_eyes is not None:
        combined_eyes = np.expand_dims(combined_eyes, axis=0)
        predicted_gaze = model.predict(combined_eyes)[0]

        gaze_x_scaled = int(predicted_gaze[0] * screenWidth)
        gaze_y_scaled = int(predicted_gaze[1] * screenHeight)

        adjusted_pred = adjustment_model.predict(predicted_gaze.reshape(1, -1))[0]
        adjusted_x, adjusted_y = adjusted_pred[0] * screenWidth, adjusted_pred[1] * screenHeight

        # Update gaze points queue
        gaze_points_queue.append((adjusted_x, adjusted_y))
        
        # Update heatmap
        heatmap = update_heatmap(gaze_points_queue, heatmap)

        # Apply colormap to create a visual heatmap
        heatmap_vis = cv2.applyColorMap(heatmap.astype(np.uint8), cv2.COLORMAP_JET)
        
        # Overlay heatmap on a copy of the canvas
        canvas_with_heatmap = cv2.addWeighted(frame, 0.5, heatmap_vis, 0.5, 0)

        cv2.imshow('Gaze Tracking with Heatmap', canvas_with_heatmap)

    else:
        cv2.imshow('Original Webcam Feed', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [9]:
import pandas as pd
import cv2
import numpy as np
import pickle
from keras.models import load_model
# Load your main gaze detection model
model = load_model('../eye_gaze_v23_2_10.h5')

# Load the adjustment model
with open('../pickel_files/adjustment_model.pkl', 'rb') as f:
    adjustment_model = pickle.load(f)

# Read a row from your dataset
row = pd.read_csv('../data/Will/eye_gaze_data.csv').iloc[-1]

# Preprocess the image
image_path = f'../{row[0]}'
image = cv2.imread(image_path)
cv2.imshow('Original Image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()

image = get_combined_eyes(image, global_detector, global_predictor)
image = np.expand_dims(image, axis=0)

# Predict gaze point with the main model
pred = model.predict(image)[0]

#only first 2 values are needed
pred = pred[:2]

# Adjust the prediction using the adjustment model
# Ensure the predicted gaze point is in the correct shape for the adjustment model
pred = pred.reshape(1, -1)  # Reshape if necessary
adjusted_pred = adjustment_model.predict(pred)[0]  # Adjust

# Screen dimensions
screenWidth, screenHeight = 1707, 960

# Canvas for visualization
canvas = np.zeros((screenHeight, screenWidth, 3), dtype=np.uint8)
cv2.namedWindow('Gaze Tracking on Canvas', cv2.WINDOW_NORMAL)
cv2.setWindowProperty('Gaze Tracking on Canvas', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

# Actual gaze coordinates from the dataset
real_x, real_y = int(row[1]), int(row[2])
print('real', real_x, real_y)

# Original and adjusted predicted gaze coordinates, scaled to screen size
pred_x, pred_y = pred[0][0], pred[0][1]
pred_x, pred_y = pred_x * screenWidth, pred_y * screenHeight
print('orig_pred', pred_x, pred_y)
adjusted_x, adjusted_y = adjusted_pred[0] * screenWidth, adjusted_pred[1] * screenHeight

# Ensure the coordinates are within screen bounds
pred_x = min(max(int(pred_x), 0), screenWidth)
pred_y = min(max(int(pred_y), 0), screenHeight)

adjusted_x = min(max(int(adjusted_x), 0), screenWidth)
adjusted_y = min(max(int(adjusted_y), 0), screenHeight)
print('adjusted_pred', adjusted_x, adjusted_y)

# Visualization
#Draw the points in different colors
# Red: Actual gaze point
# Green: Original predicted gaze point
# Blue: Adjusted predicted gaze point
cv2.circle(canvas, (real_x, real_y), 10, (0, 0, 255), -1)  # Red circle for actual gaze point
cv2.putText(canvas, 'Actual', (real_x + 20, real_y), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

cv2.circle(canvas, (pred_x, pred_y), 10, (0, 255, 0), -1)  # Green circle for original predicted gaze point
cv2.putText(canvas, 'Original_pred', (pred_x + 20, pred_y), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

cv2.circle(canvas, (adjusted_x, adjusted_y), 10, (255, 0, 0), -1)  # Blue circle for adjusted predicted gaze point
cv2.putText(canvas, 'Adjusted', (adjusted_x + 20, adjusted_y), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

cv2.imshow('Gaze Tracking on Canvas', canvas)
cv2.waitKey(0)
cv2.destroyAllWindows()

real 298 938
orig_pred 224.229297503829 725.5158233642578
adjusted_pred 230 662
