In [1]:
import cv2
import numpy as np
from openvino.runtime import Core
from openvino_detector_2022_3.model_api.performance_metrics import PerformanceMetrics
from time import perf_counter

core = Core()  # Initialize OpenVINO API
detection_model_xml = "openvino_detector_2022_3/model_2022_3/face-detection-retail-0005.xml"
detection_model = core.read_model(model=detection_model_xml)
device = "CPU"  # if you have NCS2 use "MYRIAD"
compiled_model = core.compile_model(model=detection_model, device_name=device)
input_layer = compiled_model.input(0)  # Get input layer
output_layer = compiled_model.output(0)  # get outputs layer
source = 0  # 'videos/wonder_woman.mp4' # Load the video
cap = cv2.VideoCapture(source)
N, C, H, W = input_layer.shape
metrics = PerformanceMetrics()

# Added variables to keep track of the number of faces
face_count = 0
max_faces_in_first_5_seconds = 0
start_time = perf_counter()

while True:  # Main loop
    ret, frame = cap.read()
    if not ret:
        break
    current_time = perf_counter()
    elapsed_time = current_time - start_time
    if elapsed_time >= 5:
        break
    resized_image = cv2.resize(src=frame, dsize=(W, H))
    input_data = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0).astype(np.float32)
    request = compiled_model.create_infer_request()
    request.infer(inputs={input_layer.any_name: input_data})  # Infer
    result = request.get_output_tensor(output_layer.index).data
    bboxes = []  # Post-process the outputs
    frame_height, frame_width = frame.shape[:2]

    # Reset face count to 0 for each new frame
    face_count = 0

    for detection in result[0][0]:
        label = int(detection[1])
        conf = float(detection[2])
        if conf > 0.76:
            xmin = int(detection[3] * frame_width)
            ymin = int(detection[4] * frame_height)
            xmax = int(detection[5] * frame_width)
            ymax = int(detection[6] * frame_height)
            bboxes.append([xmin, ymin, xmax, ymax])
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (243, 255, 69), 3)
            cv2.putText(frame, f"Faces: {face_count}", (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                        (255, 255, 255), 2)
            # Increment face count for each detected face
            face_count += 1

    if face_count > max_faces_in_first_5_seconds:
        max_faces_in_first_5_seconds = face_count

    metrics.update(current_time, frame)
    cv2.imshow('person detection demo', frame)
    key = cv2.waitKey(1)
    if key in {ord('q'), ord('Q'), 27}:
        break

# Display max faces in first 5 seconds on the screen
cv2.putText(frame, f"Max faces in first 5 seconds: {max_faces_in_first_5_seconds}", (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
cv2.imshow('person detection demo', frame)
cv2.waitKey(0)

cap.release()
cv2.destroyAllWindows()
