In [1]:
import cv2
import numpy as np
import os
import face_recognition


Load pre-trained YOLOv3 model
we load the pre-trained YOLOv3 model from its configuration file and weights file. This model will be used to detect objects in the video frames.

In [2]:
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")

Load COCO class labels

We load the class labels from the COCO dataset that the YOLOv3 model was trained on. This will help us identify the type of objects detected by the model.



In [3]:
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]


Set the minimum probability threshold for detection

In [4]:
min_confidence = 0.5

We initialize empty lists to keep track of the unique facial encodings and labels for each detected person, and the total number of people detected in the video feed.S

In [5]:
# Initialize empty lists for person identification
known_encodings = []
known_labels = []
person_count = 0


We define a function detect_and_identify_people() to detect and identify people in each frame of the video feed. The function takes in a single argument frame which is a numpy array representing a single video frame.



In [6]:
# Define function for detecting and identifying people in a frame
def detect_and_identify_people(frame):
    global known_encodings, known_labels, person_count

    # Get the frame dimensions and construct a blob from the frame
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)

    # Set the input blob for the neural network
    net.setInput(blob)

    # Forward pass through the neural network to detect objects
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i-1] for i in net.getUnconnectedOutLayers()]
    outputs = net.forward(output_layers)

    # Initialize empty lists for detected persons
    boxes = []
    confidences = []

    # Loop over the outputs and find the detected persons
    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            # Check if the detected object is a person and meets the minimum confidence threshold
            if classes[class_id] == 'person' and confidence > min_confidence:
                # Scale the bounding box coordinates to the original frame size
                box = detection[0:4] * np.array([w, h, w, h])
                (center_x, center_y, width, height) = box.astype('int')

                # Get the top-left corner coordinates of the bounding box
                x = int(center_x - (width/2))
                y = int(center_y - (height/2))

                # Add the bounding box coordinates and confidence score to the lists
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))

    # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, 0.4)

    # Loop over the indices and draw the bounding boxes on the frame
    for i in indices.flatten():
        # Get the top-left corner coordinates of the bounding box and its width and height
        x, y = boxes[i][0], boxes[i][1]
        w, h = boxes[i][2], boxes[i][3]

        if x < 0:
            w = w + x
            x = 0
        if y < 0:
            h = h + y
            y = 0
        if x+w > frame.shape[1]:
            w = frame.shape[1] - x
        if y+h > frame.shape[0]:
            h = frame.shape[0] - y


        # Crop the detected person from the frame
        person = frame[y:y+h, x:x+w]
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        person_array = np.array(person)

        # Generate a unique label for the person based on their appearance
        encoding = face_recognition.face_encodings(person_array)

        if len(encoding) == 0:
            continue 
        
        try:
            matches = face_recognition.compare_faces(known_encodings, encoding, tolerance=0.6)
        except TypeError:
            print("matches failed")
            matches=[True]
        if True in matches:
            continue        
        label = 'person{}'.format(person_count)
        known_encodings.append(encoding)
        known_labels.append(label)

        # Save the person's image with their label as filename
        filename = '{}.jpg'.format(label)
        cv2.imwrite(filename, person)

        # Increment the person count
        person_count += 1


In [7]:
# Initialize the camera
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Unable to open the camera")

In [8]:
while True:
    # Capture a frame from the camera
    
    ret, frame = cap.read()
    if not ret:
        print("cap failed")
        break

    detect_and_identify_people(frame)
    # Display the frame
    cv2.imshow('frame', frame)

    # Exit the loop if the user presses the 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

print(person_count)

# Release the camera and close the window
cap.release()
cv2.destroyAllWindows()

matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches failed
matches fa