## Mediapipe Implementation

MediaPipe is an open-source tool developed by Google that offers various solutions for image and video analysis. Its key features include:

-Highly accurate facial point detection.

-Real-time body pose recognition.

-Highly accurate motion detection.

-Large community of developers who constantly contribute.

-Official Google support, ensuring continuous updates and improvements.

In [None]:
# Installed Mediapipe 
# You can find problems installing mediapipe, that why you must use "!pip install mediapipe --user"
# !pip install mediapipe --user

Collecting mediapipe
  Using cached mediapipe-0.10.21-cp312-cp312-win_amd64.whl.metadata (10 kB)
Collecting absl-py (from mediapipe)
  Using cached absl_py-2.2.2-py3-none-any.whl.metadata (2.6 kB)
Collecting jax (from mediapipe)
  Using cached jax-0.6.0-py3-none-any.whl.metadata (22 kB)
Collecting jaxlib (from mediapipe)
  Using cached jaxlib-0.6.0-cp312-cp312-win_amd64.whl.metadata (1.2 kB)
Collecting opencv-contrib-python (from mediapipe)
  Using cached opencv_contrib_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Using cached sounddevice-0.5.1-py3-none-win_amd64.whl.metadata (1.4 kB)
Collecting ml_dtypes>=0.5.0 (from jax->mediapipe)
  Using cached ml_dtypes-0.5.1-cp312-cp312-win_amd64.whl.metadata (22 kB)
Using cached mediapipe-0.10.21-cp312-cp312-win_amd64.whl (51.0 MB)
Using cached sounddevice-0.5.1-py3-none-win_amd64.whl (363 kB)
Using cached absl_py-2.2.2-py3-none-any.whl (135 kB)
Using cached jax-0.6.0-py3-none-any.whl

In [9]:
import cv2
import mediapipe as mp

In [21]:
# Initialize Mediapipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose( 
    min_detection_confidence=0.5, 
    min_tracking_confidence=0.5,
    static_image_mode=False,
    )

# Initialize landmark drawing
mp_drawing = mp.solutions.drawing_utils

# Capture real-time video from the camera
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Convert frame to RGB (required by Mediapipe)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process image to get landmarks
    results = pose.process(frame_rgb)
    
    # Draw landmarks on the image if detected
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
    
    # Show the frame with landmarks
    cv2.imshow('Pose Estimation', frame)
    
    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


## Tracking just with 2 points

In [17]:
# Initialize Mediapipe Face Mesh (We focus on the face)
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Capture video from the webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    resultados = face_mesh.process(frame_rgb)

    if resultados.multi_face_landmarks:
        for face_landmarks in resultados.multi_face_landmarks:
            # Get key points of the eyes
            left_eye = face_landmarks.landmark[33]
            right_eye = face_landmarks.landmark[263]
            
            # Convert to absolute coordinates
            h, w, _ = frame.shape
            left_eye_coords = (int(left_eye.x * w), int(left_eye.y * h))
            right_eye_coords = (int(right_eye.x * w), int(right_eye.y * h))

            # Draw the eyes on the image
            cv2.circle(frame, left_eye_coords, 3, (0, 255, 0), -1)
            cv2.circle(frame, right_eye_coords, 3, (0, 255, 0), -1)

    cv2.imshow("Face Mesh - Eyes", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
face_mesh.close()


## EYES Tracking 

In [19]:
"""
Captures video from the webcam, processes each frame using MediaPipe Face Mesh (with refine_landmarks enabled),
and draws:
    - Eye reference points (in green)
    - The 4 landmarks used to estimate the iris center (in red)
    - The estimated iris center (in blue)
"""

# Initialize MediaPipe Face Mesh with landmark refinement (for iris)
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5,
    refine_landmarks=True  # Enables refined detection for the iris
)

# Initialize video capture from webcam (index 1)
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame from BGR to RGB (required format for MediaPipe)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(frame_rgb)

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            # Get frame dimensions to convert normalized coordinates to pixels
            h, w, _ = frame.shape

            # ------------------------------------------------------------
            # Detect eye reference points (landmarks 33 and 263)
            # ------------------------------------------------------------
            left_eye = face_landmarks.landmark[33]
            right_eye = face_landmarks.landmark[263]
            left_eye_coords = (int(left_eye.x * w), int(left_eye.y * h))
            right_eye_coords = (int(right_eye.x * w), int(right_eye.y * h))

            # Draw eye reference points in green
            cv2.circle(frame, left_eye_coords, 3, (0, 255, 0), -1)
            cv2.circle(frame, right_eye_coords, 3, (0, 255, 0), -1)

            # ------------------------------------------------------------
            # Detect the 4 landmarks used to estimate the iris center
            # ------------------------------------------------------------

            # Left eye iris landmarks: 468, 469, 470, 471
            left_iris_points = []
            for i in range(468, 468 + 4):
                pt = face_landmarks.landmark[i]
                x, y = int(pt.x * w), int(pt.y * h)
                left_iris_points.append((x, y))
                # Draw iris landmarks in red
                cv2.circle(frame, (x, y), 2, (0, 0, 255), -1)

            # Compute center of the left iris
            left_iris_center = (
                int(sum([p[0] for p in left_iris_points]) / len(left_iris_points)),
                int(sum([p[1] for p in left_iris_points]) / len(left_iris_points))
            )

            # Right eye iris landmarks: 473, 474, 475, 476
            right_iris_points = []
            for i in range(473, 473 + 4):
                pt = face_landmarks.landmark[i]
                x, y = int(pt.x * w), int(pt.y * h)
                right_iris_points.append((x, y))
                # Draw iris landmarks in red
                cv2.circle(frame, (x, y), 2, (0, 0, 255), -1)

            # Compute center of the right iris
            right_iris_center = (
                int(sum([p[0] for p in right_iris_points]) / len(right_iris_points)),
                int(sum([p[1] for p in right_iris_points]) / len(right_iris_points))
            )

            # Draw the estimated iris centers (pupils) in blue
            cv2.circle(frame, left_iris_center, 3, (255, 0, 0), -1)
            cv2.circle(frame, right_iris_center, 3, (255, 0, 0), -1)

    # Show the resulting frame with overlays
    cv2.imshow("Face Mesh - Eyes and Pupils", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources and close windows
cap.release()
cv2.destroyAllWindows()
face_mesh.close()


## Face Mesh 3 points

In [20]:
# Initialize Mediapipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Capture video from webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    resultados = face_mesh.process(frame_rgb)

    if resultados.multi_face_landmarks:
        for face_landmarks in resultados.multi_face_landmarks:
            # Get key points of the eyes
            left_eye = face_landmarks.landmark[33]
            right_eye = face_landmarks.landmark[263]
            
            # Convert to absolute coordinates
            h, w, _ = frame.shape
            left_eye_coords = (int(left_eye.x * w), int(left_eye.y * h))
            right_eye_coords = (int(right_eye.x * w), int(right_eye.y * h))

            # Calculate the midpoint between the eyes
            mid_eye = ((left_eye_coords[0] + right_eye_coords[0]) // 2,
                       (left_eye_coords[1] + right_eye_coords[1]) // 2)

            # Draw the key points
            cv2.circle(frame, left_eye_coords, 3, (0, 255, 0), -1)
            cv2.circle(frame, right_eye_coords, 3, (0, 255, 0), -1)
            cv2.circle(frame, mid_eye, 3, (255, 0, 0), -1)  # Midpoint

    cv2.imshow("Gaze Tracking", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
face_mesh.close()


## Following the Nose point
In the file "Image Useful\canonical_face_model_uv_visualization.png" you can see the different point to locate such the Nose (1) like in the next code.

In [24]:
# Initialize Mediapipe Face Mesh (We focus on the face)
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Capture video from the webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    resultados = face_mesh.process(frame_rgb)

    if resultados.multi_face_landmarks:
        for face_landmarks in resultados.multi_face_landmarks:
            # Get key points of the eyes
            Nose = face_landmarks.landmark[1]
            
            # Convert to absolute coordinates
            h, w, _ = frame.shape
            Nose_coords = (int(Nose.x * w), int(Nose.y * h))
            # Draw the eyes on the image
            cv2.circle(frame, Nose_coords, 3, (0, 255, 0), -1)


    cv2.imshow("Face Mesh - Eyes", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
face_mesh.close()