In [None]:
import cv2
import numpy as np

# --- Configuration ---
# Minimum confidence threshold to filter weak detections
CONFIDENCE_THRESHOLD = 0.5
# Non-Maximum Suppression (NMS) threshold to remove overlapping boxes
NMS_THRESHOLD = 0.4
# List of vehicle classes YOLOv3 can detect from the COCO dataset
VEHICLE_CLASSES = ['bicycle', 'car', 'motorbike', 'bus', 'truck']
# Path to your video file
VIDEO_PATH = 'cars.mp4' # Or the path to your video file
# --- End of Configuration ---

# Load YOLOv3 model, pre-trained on the COCO dataset
print("Loading YOLOv3 model...")
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")

# Get the names of all classes the model can detect
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

# Get the names of the output layers
layer_names = net.getLayerNames()
# The output layers are the ones that are not connected to any other layer
# This line is a bit complex but standard for getting output layer indices
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

print("Model loaded successfully. Starting video processing...")

# Open the video file
cap = cv2.VideoCapture(VIDEO_PATH)

if not cap.isOpened():
    print(f"Error: Could not open video file at {VIDEO_PATH}")
    exit()

while True:
    # Read a frame from the video
    ret, frame = cap.read()
    if not ret:
        break  # End of the video

    height, width, channels = frame.shape

    # --- Step 1: Detect Objects ---
    # Create a blob from the image. A 'blob' is a preprocessed image ready for the neural network.
    # (416, 416) is the size YOLOv3 expects.
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    # Forward pass to get the output from the output layers
    outs = net.forward(output_layers)

    # --- Step 2: Process Detections ---
    class_ids = []
    confidences = []
    boxes = []

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            # Check if the detection is a vehicle and has high confidence
            if confidence > CONFIDENCE_THRESHOLD and classes[class_id] in VEHICLE_CLASSES:
                # Object detected, calculate bounding box coordinates
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Rectangle coordinates (top-left corner)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # --- Step 3: Apply Non-Max Suppression (NMS) ---
    # NMS removes weaker, overlapping bounding boxes for the same object
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)

    # --- Step 4: Draw Bounding Boxes on the Frame ---
    font = cv2.FONT_HERSHEY_PLAIN
    # Generate a unique color for each class for visualization
    colors = np.random.uniform(0, 255, size=(len(classes), 3))

    if len(indexes) > 0:
        # Flatten the indexes array if it's not already 1D
        for i in indexes.flatten():
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence_score = confidences[i]
            color = colors[class_ids[i]]
            
            # Draw rectangle and label on the frame
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            text = f"{label} {confidence_score:.2f}"
            cv2.putText(frame, text, (x, y - 5), font, 1.5, color, 2)

    # Display the resulting frame
    cv2.imshow("Vehicle Detection", frame)

    # Press 'q' to exit the video window
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()
print("Processing finished.")