# Face Detection

### 1. Using Haar Cascades in OpenCV

Haar cascades are a machine learning-based object detection method that predates the widespread use of deep learning. They are a cascade of classifiers that use a series of simple image features to identify objects or regions of interest in images.

In [7]:
import cv2
import time

# Load the pre-trained Haar cascade for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# # Load the pre-trained Haar cascade for full body detection
# poor detection, only detects when full body in camera frame
# face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_fullbody.xml')

# Initialize the camera (assuming Raspberry Pi camera module)
cap = cv2.VideoCapture(0)

# Define the codec and create a VideoWriter object for MP4
fourcc = cv2.VideoWriter_fourcc(*'MP4V')  # Codec for MP4 format
out = cv2.VideoWriter('output_haar.mp4', fourcc, 20.0, (640, 480))  # Output file: 'output.mp4', FPS: 20.0, frame size: (640, 480)

# Variables for FPS calculation
fps_start_time = time.time()
fps_frame_count = 0

while True:
    ret, frame = cap.read()
    
    # Convert the frame to grayscale (Haar cascades work on grayscale images)
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Perform face detection
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    
    # Draw rectangles around the detected faces
    for (x, y, w, h) in faces:
        # Draw rectangle, bounding box. (x,y) is the upper left edge of box
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

        # Draw dot at center of bounding box (x + w//2, y + h//2)
        cv2.circle(frame, (x + w//2, y + h//2), 3, (0, 255, 0), -1)  # Green dot

        # Display coordinates at the top of rectangle
        cv2.putText(frame, f'x: {x}, y: {y}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    # Calculate FPS
    fps_frame_count += 1
    if fps_frame_count >= 1:
        fps_end_time = time.time()
        fps = fps_frame_count / (fps_end_time - fps_start_time)
        fps_frame_count = 0
        fps_start_time = fps_end_time
        
        # Print FPS
        # print("FPS: {:.2f}".format(fps))
        cv2.putText(frame, "FPS: {:.2f}".format(fps), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    
    # Display the frame with detected faces
    cv2.imshow('Face Detection', frame)

    # Write the frame to the output video
    out.write(frame)
    
    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera, VideoWriter and close the OpenCV window
cap.release()
out.release()
cv2.destroyAllWindows()

On CPU it is giving over 20 fps.

### 2. Using cvlib's face_detect 

The `cvlib.detect_face` function in the `cvlib` library uses a pre-trained deep learning model for face detection. The model is based on a Single Shot Multibox Detector (SSD), which is a popular architecture for object detection tasks. It uses a pre-trained caffe model, with a modified res-net backbone `res10_300x300_ssd_iter_140000.caffemodel`.

In [5]:
import cv2
import cvlib
import time

# Initialize the camera (assuming default camera)
cap = cv2.VideoCapture(0)

# Define the codec and create a VideoWriter object for MP4
fourcc = cv2.VideoWriter_fourcc(*'MP4V')  # Codec for MP4 format
out = cv2.VideoWriter('output_resnet10.mp4', fourcc, 20.0, (640, 480))  # Output file: 'output.mp4', FPS: 20.0, frame size: (640, 480)

# Variables for FPS calculation
fps_start_time = time.time()
fps_frame_count = 0

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Perform face detection
    faces, confidences = cvlib.detect_face(frame)

    # print(faces)

    # Loop over detected faces and draw rectangles
    for face in faces:
        (startX, startY, endX, endY) = face
        cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2)

        # # Draw dot at center of bounding box (x + w//2, y + h//2)
        # coordinateX = (startX + endX)//2
        # coordinateY = (startY + endY)//2
        # cv2.circle(frame, (coordinateX, coordinateY), 3, (255, 255, 255), -1)  # White dot
        # # print(f"x: {coordinateX}, y: {coordinateY}")
    
        # Draw dot at the neck, that is below of bounding box
        # neck position
        coordinateX = (startX + endX) // 2
        coordinateY = endY + int(0.25*(endY - startY))
        cv2.circle(frame, (coordinateX, coordinateY), 3, (0, 255, 0), -1)  # Green dot

    # Calculate FPS
    fps_frame_count += 1
    if fps_frame_count >= 1:
        fps_end_time = time.time()
        fps = fps_frame_count / (fps_end_time - fps_start_time)
        fps_frame_count = 0
        fps_start_time = fps_end_time

        # Print FPS
        cv2.putText(frame, "FPS: {:.2f}".format(fps), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

    # Display the frame with detected faces and FPS
    cv2.imshow('Face Detection', frame)

    # Write the frame to the output video
    out.write(frame)

    # Break the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close the OpenCV window
cap.release()
out.release()
cv2.destroyAllWindows()

[[268, 150, 457, 410]]
[[267, 150, 455, 411]]
[[267, 149, 454, 411]]
[[267, 149, 453, 409]]
[[267, 147, 451, 409]]
[[266, 145, 451, 409]]
[[267, 146, 451, 409]]
[[267, 146, 452, 409]]
[[267, 145, 452, 409]]
[[267, 145, 452, 409]]
[[267, 145, 452, 409]]
[[267, 145, 452, 409]]
[[269, 142, 455, 407]]
[[270, 142, 455, 406]]
[[272, 142, 455, 404]]
[[273, 141, 455, 404]]
[[273, 140, 455, 404]]
[[274, 139, 455, 403]]
[[274, 140, 455, 403]]
[[274, 141, 455, 403]]
[[274, 143, 455, 403]]
[[273, 143, 455, 403]]
[[273, 143, 456, 403]]
[[273, 144, 456, 403]]
[[272, 143, 457, 403]]
[[273, 145, 457, 403]]
[[273, 145, 457, 404]]
[[275, 147, 459, 406]]
[[275, 148, 459, 405]]
[[276, 147, 459, 406]]
[[276, 147, 459, 407]]
[[275, 147, 460, 406]]
[[275, 148, 460, 406]]
[[276, 148, 460, 406]]
[[276, 148, 459, 407]]
[[276, 147, 460, 408]]
[[275, 149, 459, 409]]
[[275, 148, 461, 408]]
[[275, 149, 461, 407]]
[[276, 149, 461, 408]]
[[276, 150, 462, 408]]
[[276, 149, 462, 408]]
[[277, 148, 462, 409]]
[[277, 148,

On CPU giving over 20 FPS, and can detect faces better.

In [4]:
# Release the camera and close the OpenCV window
cap.release()
out.release()
cv2.destroyAllWindows()