In [20]:
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# Face Detection using MediaPipe

## Task Inputs

The Face Detector accepts an input of one of the following data types:

- Still images  
- Decoded video frames  
- Live video feed  

## Task Outputs

The Face Detector outputs the following results:  

- Bounding boxes for detected faces in an image frame.  
- Coordinates for 6 face landmarks for each detected face.  
### Has models
1. Blazeface (short range  & long range)
2. BlazeFace Sparse

### source https://ai.google.dev/edge/mediapipe/solutions/vision/face_detector
using short range for now
short range works on Single Shot Detector (SSD) convolutional network technique https://arxiv.org/abs/1512.02325

In [27]:
# model initialisation

mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
face_detection = mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5)

def detect_faces(frame):
    frame_rgb=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results=face_detection.process(frame_rgb)

    #drae detections
    if results.detections:
        for detection in results.detections:
            mp_drawing.draw_detection(frame,detection)

In [29]:
cap=cv2.VideoCapture(0)
if not cap.isOpened():
    print("error")
    exit()

while True:
    ret,frame=cap.read()
    if not ret:
        print("error falied to capture")

    #media pipe cv tasks

    #Object detecion

    #face_detection
    detect_faces(frame)
     
    cv2.imshow('real-time video', frame)
    if cv2.waitKey(1) & 0xFF==ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

# Face landmark detection
## Task inputs 	
The Face Landmarker accepts an input of one of the following data types:
- Still images
- Decoded video frames
- Live video feed


## Task outputs
The Face Landmarker outputs the following results:
- Bounding boxes for detected faces in an image frame
- A complete face mesh for each detected face, with blendshape scores denoting facial expressions and coordinates for facial landmarks.

## Model
uses a series of model to predict face landmarks
- Face detection model: detects the presence of faces with a few key facial landmarks.
- Face mesh model: adds a complete mapping of the face. The model outputs an estimate of 478 3-dimensional face landmarks.
- Blendshape prediction model: receives output from the face mesh model predicts 52 blendshape scores, which are coefficients representing facial different expressions.

## configurations https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker#models

## source https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker

In [22]:
cap=cv2.VideoCapture(0)
if not cap.isOpened():
    print("error")
    exit()
#initialize the model

mp_drawing = mp.solutions.drawing_utils
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)


drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)

while True:
    ret,frame=cap.read()
    if not ret:
        print("error falied to capture")
    frame_rgb=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results=face_mesh.process(frame_rgb)

    #drae detections
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            mp_drawing.draw_landmarks(
                image=frame,
                landmark_list=face_landmarks,
                connections=mp_face_mesh.FACEMESH_TESSELATION,
                landmark_drawing_spec=drawing_spec,
                connection_drawing_spec=drawing_spec
            )            
        
    cv2.imshow('real-time video', frame)
    if cv2.waitKey(1) & 0xFF==ord('q'):
      break

cap.release()
cv2.destroyAllWindows()