Using Yolov5 Model

In [2]:
from ultralytics import YOLO
import cv2

# Load the YOLOv5 model
model = YOLO('yolov5s.pt')  # YOLOv5s is a small pre-trained model

# Open a video file or real-time camera
cap = cv2.VideoCapture(0)  # Use 0 for webcam or provide a video path
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Perform object detection
    results = model.predict(source=frame, save=False)

    # Draw bounding boxes
    annotated_frame = results[0].plot()

    # Display the frame
    cv2.imshow('YOLOv5 Detection', annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()



PRO TIP  Replace 'model=yolov5s.pt' with new 'model=yolov5su.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.


0: 480x640 1 person, 147.1ms
Speed: 0.0ms preprocess, 147.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 145.6ms
Speed: 0.0ms preprocess, 145.6ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 125.7ms
Speed: 0.0ms preprocess, 125.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 117.0ms
Speed: 0.0ms preprocess, 117.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 115.7ms
Speed: 0.0ms preprocess, 115.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 couch, 118.1ms
Speed: 2.3ms preprocess, 118.1ms inference, 0.0ms postprocess per

Using SSD Model 

Didn't worked with this model and configuration file because of compatibility issues with my OpenCV also the error "Const input blob for weights not found" occurs when the .pb file is incomplete or not frozen properly (e.g., it doesn’t include trained weights).

In [7]:
import cv2
import numpy as np

# Load the pre-trained SSD model and the configuration files
model_path = r"C:\Users\Vibha Narayan\Downloads\frozen_inference_graph.pb"  # Path to the pre-trained SSD model
config_path = r"C:\Users\Vibha Narayan\Downloads\ssd_mobilenet_v2_coco_2018_03_29.pbtxt"  # Configuration file
net = cv2.dnn.readNetFromTensorflow(model_path, config_path)

# Load class labels for COCO dataset
with open("coco.names", "r") as f:
    class_labels = f.read().strip().split("\n")

# Set input video source (0 for real-time camera, or path for a pre-recorded video)
video_source = 0  # Replace with video file path for pre-recorded video
cap = cv2.VideoCapture(video_source)

# Define output video writer (for saving the result)
output_path = "output_ssd_detection.mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Detection threshold
confidence_threshold = 0.5

# Process video frame by frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Prepare the frame for the SSD model
    h, w = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1/127.5, (300, 300), (127.5, 127.5, 127.5), swapRB=True, crop=False)
    net.setInput(blob)

    # Forward pass through the model
    detections = net.forward()

    # Loop through the detections
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > confidence_threshold:
            class_id = int(detections[0, 0, i, 1])  # Class ID of the detected object
            class_name = class_labels[class_id] if class_id < len(class_labels) else "Unknown"

            # Get bounding box coordinates
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            x_min, y_min, x_max, y_max = box.astype("int")

            # Draw bounding box and label
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            label = f"{class_name}: {confidence:.2f}"
            cv2.putText(frame, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Write frame to output video
    out.write(frame)

    # Display the result
    cv2.imshow("SSD Object Detection", frame)

    # Break loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()


error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\tensorflow\tf_importer.cpp:2885: error: (-2:Unspecified error) Const input blob for weights not found in function 'cv::dnn::dnn4_v20240521::`anonymous-namespace'::TFImporter::getConstBlob'


Correct working SSD300 Model

The .caffemodel file provides weights pre-trained on datasets like Pascal VOC, enabling the model to detect a wide range of object classes without additional training.

In [2]:
import cv2
import numpy as np

# Load the pre-trained SSD model and configuration
model_path = r"C:\Users\Vibha Narayan\Downloads\MobileNetSSD_deploy.caffemodel"
config_path = r"C:\Users\Vibha Narayan\Downloads\MobileNetSSD_deploy.prototxt.txt"
net = cv2.dnn.readNetFromCaffe(config_path, model_path)

# Define class labels for objects detectable by the SSD model
class_labels = [
    "background", "aeroplane", "bicycle", "bird", "boat",
    "bottle", "bus", "car", "cat", "chair", "cow",
    "diningtable", "dog", "horse", "motorbike", "person",
    "pottedplant", "sheep", "sofa", "train", "tvmonitor"
]

# Open video capture (0 for webcam or replace with file path for pre-recorded video)
cap = cv2.VideoCapture(0)  # Use a pre-recorded video file path if needed

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Get dimensions of the frame
    height, width = frame.shape[:2]

    # Preprocess the frame for SSD
    blob = cv2.dnn.blobFromImage(frame, 0.007843, (300, 300), 127.5)
    net.setInput(blob)
    detections = net.forward()

    # Loop over detections
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.5:  # Confidence threshold
            idx = int(detections[0, 0, i, 1])  # Class index
            label = class_labels[idx]
            box = detections[0, 0, i, 3:7] * np.array([width, height, width, height])
            (startX, startY, endX, endY) = box.astype("int")

            # Draw bounding box and label
            cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2)
            text = f"{label}: {confidence:.2f}"
            cv2.putText(frame, text, (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow("SSD Object Detection", frame)

    # Break the loop on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
