In [1]:
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# Face Detection using MediaPipe

## Task Inputs

The Face Detector accepts an input of one of the following data types:

- Still images  
- Decoded video frames  
- Live video feed  

## Task Outputs

The Face Detector outputs the following results:  

- Bounding boxes for detected faces in an image frame.  
- Coordinates for 6 face landmarks for each detected face.  
### Has models
1. Blazeface (short range  & long range)
2. BlazeFace Sparse

### source https://ai.google.dev/edge/mediapipe/solutions/vision/face_detector
using short range for now
short range works on Single Shot Detector (SSD) convolutional network technique https://arxiv.org/abs/1512.02325

In [27]:
# model initialisation

mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
face_detection = mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5)

def detect_faces(frame):
    frame_rgb=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results=face_detection.process(frame_rgb)

    #drae detections
    if results.detections:
        for detection in results.detections:
            mp_drawing.draw_detection(frame,detection)

In [29]:
cap=cv2.VideoCapture(0)
if not cap.isOpened():
    print("error")
    exit()

while True:
    ret,frame=cap.read()
    if not ret:
        print("error falied to capture")

    #media pipe cv tasks

    #Object detecion

    #face_detection
    detect_faces(frame)
     
    cv2.imshow('real-time video', frame)
    if cv2.waitKey(1) & 0xFF==ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

# Face landmark detection
## Task inputs 	
The Face Landmarker accepts an input of one of the following data types:
- Still images
- Decoded video frames
- Live video feed


## Task outputs
The Face Landmarker outputs the following results:
- Bounding boxes for detected faces in an image frame
- A complete face mesh for each detected face, with blendshape scores denoting facial expressions and coordinates for facial landmarks.

## Model
uses a series of model to predict face landmarks
- Face detection model: detects the presence of faces with a few key facial landmarks.
- Face mesh model: adds a complete mapping of the face. The model outputs an estimate of 478 3-dimensional face landmarks.
- Blendshape prediction model: receives output from the face mesh model predicts 52 blendshape scores, which are coefficients representing facial different expressions.

## configurations https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker#models

## source https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker

In [7]:
cap=cv2.VideoCapture(0)
if not cap.isOpened():
    print("error")
    exit()
#initialize the model

mp_drawing = mp.solutions.drawing_utils
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)


drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)

while True:
    ret,frame=cap.read()
    if not ret:
        print("error falied to capture")
    frame_rgb=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results=face_mesh.process(frame_rgb)

    #drae detections
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            mp_drawing.draw_landmarks(
                image=frame,
                landmark_list=face_landmarks,
                connections=mp_face_mesh.FACEMESH_TESSELATION,
                landmark_drawing_spec=drawing_spec,
                connection_drawing_spec=drawing_spec
            )            
        
    cv2.imshow('real-time video', frame)
    if cv2.waitKey(1) & 0xFF==ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

Trying some more options in Face landmarks detection

In [3]:
# Initialize MediaPipe Face Landmarker
model_path = 'face_landmarker.task'  # Download from MediaPipe's official models
base_options = python.BaseOptions(model_asset_path=model_path)
options = vision.FaceLandmarkerOptions(
    base_options=base_options,
    output_face_blendshapes=True,
    output_facial_transformation_matrixes=True,
    num_faces=1)
detector = vision.FaceLandmarker.create_from_options(options)

source for below visualisations https://colab.sandbox.google.com/github/googlesamples/mediapipe/blob/main/examples/face_landmarker/python/%5BMediaPipe_Python_Tasks%5D_Face_Landmarker.ipynb

In [5]:
# draw_landmarks 
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import matplotlib.pyplot as plt

def draw_landmarks_on_image(rgb_image, detection_result):
    face_landmarks_list = detection_result.face_landmarks
    annotated_image = np.copy(rgb_image)

    for idx in range(len(face_landmarks_list)):
        face_landmarks = face_landmarks_list[idx]
        face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        face_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
        ])

        solutions.drawing_utils.draw_landmarks(
            image=annotated_image,
            landmark_list=face_landmarks_proto,
            connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
            landmark_drawing_spec=None,
            connection_drawing_spec=mp.solutions.drawing_styles
            .get_default_face_mesh_tesselation_style())
        solutions.drawing_utils.draw_landmarks(
            image=annotated_image,
            landmark_list=face_landmarks_proto,
            connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
            landmark_drawing_spec=None,
            connection_drawing_spec=mp.solutions.drawing_styles
            .get_default_face_mesh_contours_style())
        solutions.drawing_utils.draw_landmarks(
            image=annotated_image,
            landmark_list=face_landmarks_proto,
            connections=mp.solutions.face_mesh.FACEMESH_IRISES,
            landmark_drawing_spec=None,
            connection_drawing_spec=mp.solutions.drawing_styles
            .get_default_face_mesh_iris_connections_style())

    return annotated_image

def plot_face_blendshapes_bar_graph(face_blendshapes):
    face_blendshapes_names = [face_blendshapes_category.category_name for face_blendshapes_category in face_blendshapes]
    face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in face_blendshapes]
    face_blendshapes_ranks = range(len(face_blendshapes_names))

    fig, ax = plt.subplots(figsize=(12, 12))
    bar = ax.barh(face_blendshapes_ranks, face_blendshapes_scores)
    ax.set_yticks(face_blendshapes_ranks, face_blendshapes_names)
    ax.invert_yaxis()

    for score, patch in zip(face_blendshapes_scores, bar.patches):
        plt.text(patch.get_x() + patch.get_width(), patch.get_y(), f"{score:.4f}", va="top")

    ax.set_xlabel('Score')
    ax.set_title("Face Blendshapes")
    plt.tight_layout()
    plt.show()

In [7]:
# Real-time processing
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert to RGB and process
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
    detection_result = detector.detect(mp_image)

    if detection_result.face_landmarks:
        # Draw landmarks
        annotated_image = draw_landmarks_on_image(rgb_frame, detection_result)
        display_image = cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR)
        
        # Press 'b' to show blendshapes plot
        if cv2.waitKey(1) & 0xFF == ord('b') and detection_result.face_blendshapes:
            plot_face_blendshapes_bar_graph(detection_result.face_blendshapes[0])

    cv2.imshow('Face Landmarks', display_image)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()