# IMPORTING LIBRARIES

In [1]:
import cv2
import numpy as np

In [2]:
# load YOLO
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()
unconnected_layers = net.getUnconnectedOutLayers()

# Check if unconnected_layers is a 1D array or 2D array
if isinstance(unconnected_layers, (list, np.ndarray)) and isinstance(unconnected_layers[0], list):
    output_layers = [layer_names[i[0] - 1] for i in unconnected_layers]
else:
    output_layers = [layer_names[i - 1] for i in unconnected_layers]

with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

In [None]:
# Load video from a file or camera feed (e.g., "video.mp4" or an empty string for the default webcam)
cap = cv2.VideoCapture("4K Road traffic video for object detection and tracking - free download now!.mp4")

# Continuously capture frames from the video source
while cap.isOpened():
    # Read the next frame from the video
    ret, frame = cap.read()
    
    # If the frame was not captured successfully (end of video or error), break the loop
    if not ret:
        break

    # Get the dimensions of the frame (height, width, and number of channels)
    height, width, channels = frame.shape

    # Convert the frame into a blob (preprocessing step for YOLO) resize to 416x416, scale, and normalize
    blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), (0, 0, 0), swapRB = True, crop=False)
    
    # Pass the blob as input to the YOLO model
    net.setInput(blob)
    
    # Perform forward pass to get the output layers of the YOLO model (predictions)
    outs = net.forward(output_layers)

    # Initialize lists to store detection results: class IDs, confidences, and bounding boxes
    class_ids = []
    confidences = []
    boxes = []

    # Loop over each output from the YOLO model
    for out in outs:
        # Loop over each detection in the output
        for detection in out:
            # Extract scores for all object classes starting from the 6th element onward (ignoring bbox coordinates)
            scores = detection[5:]
            # Get the index of the class with the highest score (i.e., the detected object class)
            class_id = np.argmax(scores)
            # Get the confidence score for the detected object class
            confidence = scores[class_id]
            
            # Only consider detections with confidence above 0.5 and specifically for the class "car"
            if confidence > 0.5 and class_id == classes.index("car"):
                # Object detected, calculate center coordinates and size of the bounding box
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Calculate the top-left corner of the bounding box
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                # Store the bounding box, confidence, and class ID
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Apply non-maxima suppression to remove overlapping bounding boxes with lower confidence
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    # Loop over the remaining boxes after non-maxima suppression
    for i in range(len(boxes)):
        # Only proceed if the current box is kept after suppression
        if i in indexes:
            # Get the coordinates of the bounding box
            x, y, w, h = boxes[i]
            # Get the class label (e.g., "car") for the detected object
            label = str(classes[class_ids[i]])
            # Draw the bounding box around the detected object in green
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            # Put the class label above the bounding box
            cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0), 2)

    # Display the processed frame with bounding boxes and labels
    cv2.imshow("Video", frame)
    
    # Check if the user pressed the "q" key; if so, exit the loop
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video capture object
cap.release()
# Close all OpenCV windows
cv2.destroyAllWindows()