In [2]:
import pyautogui
import cv2
import numpy as np
from pynput.mouse import Controller
import screeninfo

# Eye Gaze Mouse Movement Open Source

## Example of ScreenReading

In [3]:

monitors = screeninfo.get_monitors()

for monitor in monitors:
    print(f"Monitor: {monitor.name}")
    print(f"Width: {monitor.width}")
    print(f"Height: {monitor.height}")
    print(f"Position: {monitor.x}, {monitor.y}")
    print("----------")

Monitor: HDMI-0
Width: 3440
Height: 1440
Position: 0, 0
----------


## Code to list active webcam devices and preview webcam feed

In [4]:
def list_webcams():
    """List all active webcam devices connected to the system
    """
    available_cameras = []
    for i in range(10):
        cap = cv2.VideoCapture(i)
        if cap.isOpened():
            available_cameras.append(i)
        cap.release()
    return available_cameras

In [5]:
def preview_webcam(device_index=0):
    """Preview the webcam feed from a specific device.

    Parameters
    ----------
    device_index : int, optional
        This is the index of the device, by default 0
    """
    cap = cv2.VideoCapture(device_index)

    if not cap.isOpened():
        print(f"Unable to open webcam at index {device_index}")
        return

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break
    
        cv2.imshow(f"Webcam Preview - Device {device_index}", frame)

        # Press 'q' to exit the preview
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [None]:
# List active webcam devices
print("Active webcams")
webcams = list_webcams()
if webcams:
    print(webcams)
    # Preview the first available webcam
    preview_webcam(webcams[0])
else:
    print("No active webcam devices found.")

Active webcams
[0]


[ WARN:0@5.285] global cap_v4l.cpp:913 open VIDEOIO(V4L2:/dev/video1): can't open camera by index
[ERROR:0@5.285] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
[ WARN:0@5.285] global cap_v4l.cpp:913 open VIDEOIO(V4L2:/dev/video2): can't open camera by index
[ERROR:0@5.286] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
[ WARN:0@5.286] global cap_v4l.cpp:913 open VIDEOIO(V4L2:/dev/video3): can't open camera by index
[ERROR:0@5.286] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
[ WARN:0@5.286] global cap_v4l.cpp:913 open VIDEOIO(V4L2:/dev/video4): can't open camera by index
[ERROR:0@5.286] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range
[ WARN:0@5.286] global cap_v4l.cpp:913 open VIDEOIO(V4L2:/dev/video5): can't open camera by index
[ERROR:0@5.286] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup C

KeyboardInterrupt: 

: 

## Example code to track eye gaze and move the mouse

In [2]:
import cv2
import numpy as np
import mediapipe as mp
from pynput.mouse import Controller
import time
import sys
import screeninfo

2025-05-14 11:58:11.475205: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-14 11:58:11.484149: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747238291.493816  423018 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747238291.496698  423018 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747238291.504693  423018 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [3]:
# ------------------
# 1. Setup
# ------------------

mp_face = mp.solutions.face_mesh
face_mesh = mp_face.FaceMesh(static_image_mode=False, refine_landmarks=True, min_detection_confidence=0.5, min_tracking_confidence=0.5)
mouse = Controller()

# Acquiring the actual screen display size
monitor = screeninfo.get_monitors()[0]
SCREEN_W, SCREEN_H = monitor.width, monitor.height

# Eye landmarks indices for iris center (MediaPipe FaceMesh)
# Left iris center is average of landmarks 473, 474, 475, 476
LEFT_IRIS_IDX = [473, 474, 475, 476]
RIGHT_IRIS_IDX = [468, 469, 470, 471]

CAL_POINTS_NORM = [
    (0.1, 0.1), # top-left
    (0.9, 0.1), # top-right
    (0.1, 0.9), # bottom-left
    (0.9, 0.9), # bottom-right
    (0.5, 0.5)  # center
]

def get_iris_center(landmarks, idx_list, img_w, img_h):
    xs = [landmarks[i].x * img_w for i in idx_list]
    yx = [landmarks[i].y * img_h for i in idx_list]
    return np.mean(xs), np.mean(ys)

# --------------
# 2. Calibration
# --------------
def calibrate(cap):
    cal_src = [] # [ [iris_x, iris_y], ... ]
    cal_dst = [] # [ [screen_x, screen_y], ... ]
    for(nx, ny) in CAL_POINTS_NORM:
        # Draw fulll-screen point
        while True:
            ret, frame = cap.read()
            if not ret:
                sys.exit("Webcam not available")
            h, w, _ = frame.shape
            # draw calibration point
            px, py = int(nx*w), int(ny*h)
            disp = frame.copy()
            cv2.circle(disp, (px, py), 20, (0, 255, 0), -1)
            cv2.putText(disp, "Press Space to record here", (30, 30), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
            cv2.imshow("Calibration", disp)
            key = cv2.waitKey(1)
            if key == 32: # space bar
                # capture a frame, detect face
                rgb = cv2.cvtColor(frame, cv2.COLOR_BAYER_BG2BGR)
                res = face_mesh.process(rgb)
                if not res.multi_face_landmarks:
                    continue # try again until face is found
                lm = res.multi_face_landmarks[0].landmark
                # average both eyes
                lx, ly = get_iris_center(lm, LEFT_IRIS_IDX, w, h)
                rx, ry = get_iris_center(lm, RIGHT_IRIS_IDX, w, h)
                ix, iy = (lx+rx)/2, (ly+ry)/2
                cal_src.append([ix, iy])
                cal_dst.append([nx*SCREEN_W, ny*SCREEN_H])
                break
            if key == 27:  # Escape quits
                sys.exit("Calibration aborted")
    cv2.destroyWindow("Calibration")
    return np.array(cal_src), np.array(cal_dst)


# ------------------
# 3. Compute mapping
# ------------------

def solve_mapping(src, dst):
    # Want a mapping: [ix, iy, 1] @ M = [sx, sy]
    A = np.hstack([src, np.ones((src.shape[0], 1))]) # shape: N x 3
    # solve least squares for M: shape 3x2
    M, _, _, _ = np.linalg.lstsq(A, dst, rcond=None) 
    return M # [ix, iy, 1] @ M = [screen_x, screen_y]

# ------------------------------------------
# 4. Drive real-time mouse-movement via gaze
# ------------------------------------------
def drive_mouse_via_gaze(cap, M):
    last_x, last_y = SCREEN_W/2, SCREEN_H/2
    alpha = 0.3 # smoothing factor

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        h, w, _ = frame.shape
        rgb = cv2.cvtColor(frame, cv2.COLOR_BAYER_BG2BGR)
        res = face_mesh.process(rgb)
        if res.multi_face_landmarks:
            lm = res.multi_face_landmarks[0].landmark
            lx, ly = get_iris_center(lm, LEFT_IRIS_IDX, w, h)
            rx, ry = get_iris_center(lm, RIGHT_IRIS_IDX, w, h)
            ix, iy = (lx+rx)/2, (ly+ry)/2
            # map to screen (affine)
            src_v = np.array([ix, iy, 1.0])
            sx, sy = src_v @ M
            # smooth
            cx = last_x +alpha*(sx - last_x)
            cy = last_y + alpha*(sy - last_y)
            mouse_position = (cx, cy)
            last_x, last_y = cx, cy
        
        # OPTIONAL: display camera feed
        cv2.imshow("Mouse->Gaze", frame)
        key = cv2.waitKey(1)
        if key == 27: # Esc to quit
            break


# -------
# 4. Main
# -------

def main():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        sys.exit("Cannot open camera")
    print("Starting calibration...")
    src, dst = calibrate(cap)
    print("Calibrating mapping...")
    M = solve_mapping(src, dst)
    print("Starting gaze-controlled mouse. Press Esc to exit.")
    drive_mouse_via_gaze(cap, M)
    cap.release()
    cv2.destroyAllWindows()

I0000 00:00:1747238294.018778  423018 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1747238294.065340  423158 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 550.144.03), renderer: NVIDIA GeForce RTX 2080 Ti/PCIe/SSE2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1747238294.071232  423156 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1747238294.081085  423144 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [None]:
main()

Starting calibration...


error: OpenCV(4.11.0) /io/opencv/modules/imgproc/src/demosaicing.cpp:1778: error: (-215:Assertion failed) scn == 1 && (dcn == 3 || dcn == 4) in function 'demosaicing'


: 