In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Load YOLO weights and configuration
yolo = cv2.dnn.readNet("./yolov3.weights", "./yolov3.cfg")

# Load the class names
classes = []
with open("./coco.names", 'r') as f:
    classes = f.read().splitlines()

# Open a connection to the webcam 
cap = cv2.VideoCapture(0)

while True:
    # Capture a frame from the webcam
    ret, frame = cap.read()

    # Preprocess the frame for YOLO
    blob = cv2.dnn.blobFromImage(frame, 1/255, (320, 320), (0, 0, 0), swapRB=True, crop=False)

    # Set the input for YOLO
    yolo.setInput(blob)

    # Get the names of the output layers
    output_layers_names = yolo.getUnconnectedOutLayersNames()

    # Forward pass through the network
    layeroutput = yolo.forward(output_layers_names)

    # Get the height and width of the frame
    height, width, _ = frame.shape

    # Create empty lists to store detected bounding boxes, confidences, and class IDs
    boxes = []
    confidences = []
    class_ids = []

    # Iterate through each output layer
    for output in layeroutput:
        # Iterate through each detection in the output layer
        for detection in output:
            # Extract class scores and confidence
            score = detection[5:]
            class_id = np.argmax(score)
            confidence = score[class_id]

            # Filter out weak detections (confidence threshold = 0.7)
            if confidence > 0.7:
                # Scale the bounding box coordinates back to the original frame size
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Calculate the top-left corner coordinates of the bounding box
                x = int(center_x - w/2)
                y = int(center_y - h/2)

                # Store the bounding box coordinates, confidence, and class ID
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Perform non-maximum suppression to remove redundant overlapping boxes
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    # Set font and random colors for bounding boxes
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(boxes), 3))

    # Draw bounding boxes on the frame
    for i in indexes.flatten():
        x, y, w, h = boxes[i]
        label = str(classes[class_ids[i]])
        confi = str(round(confidences[i], 2))
        color = colors[i]

        # Draw rectangle and label on the frame
        cv2.rectangle(frame, (x, y), (x+w, y+h), color, 3)
        cv2.putText(frame, label + " " + confi, (x, y+20), font, 2, (255, 255, 255), 3)

    # Display the frame with bounding boxes
    cv2.imshow('Webcam Object Detection', frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close the window
cap.release()
cv2.destroyAllWindows()