In [None]:
# importing required libraries
import cv2
import numpy as np

In [None]:
# Loading YOLO weights and configuration file
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")

# Initializing an empty list to store class names
classes = []

# Reading class names from the "coco.names" file and adding them to the 'classes' list
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f]


In [None]:
# Loading the video
cap = cv2.VideoCapture("production_id_4791196.mp4") 

# Getting the frames per second (fps) of the video
fps = cap.get(cv2.CAP_PROP_FPS)

# Getting the width of the frames in the video
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

# Getting the height of the frames in the video
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))


In [None]:
# Defining the codec for video encoding (XVID is a common choice)
fourcc = cv2.VideoWriter_fourcc(*'XVID')

# Creating a VideoWriter object to write the annotated frames to a new video file
# Parameters: output file name, codec, frames per second (fps), frame size (width, height)
output_video = cv2.VideoWriter("annotated.avi", fourcc, fps, (width, height))


In [None]:
# opening the video capture device
while cap.isOpened():
    
    # reading a frame from the video capture
    ret, frame = cap.read()

    # Breaking the loop if there are no more frames
    if not ret:
        break

    # Converting the frame to a blob for processing with a neural network
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

    # Setting the input blob to the neural network
    net.setInput(blob)

    # Getting the names of the output layers of the neural network
    output_layers = net.getUnconnectedOutLayersNames()

    # Performing a forward pass through the neural network
    outs = net.forward(output_layers)

    # Extracting information from the network's output
    for out in outs:
        
        for detection in out:
            
            # Extracting confidence scores and class IDs from the output
            scores = detection[5:]
            
            class_id = np.argmax(scores)
            
            confidence = scores[class_id]

            # Checking if the detected object is a human with high confidence
            if confidence > 0.9 and (classes[class_id] == 'person' or classes[class_id] == 'bicycle'):
                
                # Extracting bounding box coordinates
                center_x, center_y, w, h = (detection[0:4] * np.array([width, height, width, height])).astype('int')
                
                x, y = int(center_x - w/2), int(center_y - h/2)

                # Drawing a bounding box around the detected person
                cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)

    # Writing the frame with bounding boxes to the output video
    output_video.write(frame)

    # Displaying the annotated video frame in a seperate window
    cv2.imshow('Pre-Annotated Video', frame)

    # Checking for the 'Esc' key press to exit the loop
    if cv2.waitKey(1) == 27:
        break

# Releases video capture and output video resources
cap.release()
output_video.release()
cv2.destroyAllWindows()