In [8]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import os

model_path = 'yolo11s.pt'  
model = YOLO(model_path)
model.to('cpu')

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C3k2(
        (cv1): Conv(
          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(96, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_runnin

In [None]:
def count_faces_yolo(image_path: str, conf_threshold: int=0.5) -> int:
    """
    Detect and count faces in an image using YOLOv8.

    Parameters:
        image_path (str): Path to the input image.
        conf_threshold (float): Confidence threshold for detections.

    Returns:
        int: Number of faces detected.
    """

    results = model(image_path, conf=conf_threshold)
    boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
    
    return len(boxes)


In [10]:
def extract_frames(video_path, save_dir, fps_limit=10):
    cap = cv2.VideoCapture(video_path)
    video_fps = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = int(video_fps / fps_limit)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    count = 0
    saved = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if count % frame_interval == 0:
            num_faces = count_faces_yolo(frame)
            if num_faces != 1:
                continue
            resized = cv2.resize(frame, (640, 480))
            cv2.imwrite(os.path.join(save_dir, f'frame_{saved:04}.png'), resized)
            saved += 1
        count += 1
    cap.release()


In [11]:
extract_frames("./vids/welad_rizk.mp4", "./output_frames/welad_rizk", fps_limit=10)


0: 288x640 2 persons, 115.8ms
Speed: 1.8ms preprocess, 115.8ms inference, 0.8ms postprocess per image at shape (1, 3, 288, 640)

0: 288x640 2 persons, 100.4ms
Speed: 1.0ms preprocess, 100.4ms inference, 0.4ms postprocess per image at shape (1, 3, 288, 640)

0: 288x640 2 persons, 98.9ms
Speed: 0.9ms preprocess, 98.9ms inference, 0.4ms postprocess per image at shape (1, 3, 288, 640)

0: 288x640 2 persons, 102.3ms
Speed: 1.0ms preprocess, 102.3ms inference, 0.4ms postprocess per image at shape (1, 3, 288, 640)

0: 288x640 1 person, 96.4ms
Speed: 0.9ms preprocess, 96.4ms inference, 0.4ms postprocess per image at shape (1, 3, 288, 640)

0: 288x640 1 person, 99.5ms
Speed: 0.9ms preprocess, 99.5ms inference, 0.5ms postprocess per image at shape (1, 3, 288, 640)

0: 288x640 1 person, 98.7ms
Speed: 0.9ms preprocess, 98.7ms inference, 0.4ms postprocess per image at shape (1, 3, 288, 640)

0: 288x640 1 person, 99.3ms
Speed: 1.4ms preprocess, 99.3ms inference, 0.4ms postprocess per image at shape