In [3]:
from ultralytics import YOLO
import cv2

# Load the YOLOv5 model
model = YOLO("yolov5su.pt")  # Pretrained YOLO model

# Open a video file or webcam
cap = cv2.VideoCapture('vid.mp4')  # Use 0 for webcam, or replace with 'vid.mp4'

# Check if the video file or webcam opened successfully
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter(
    'output_detected.mp4',
    cv2.VideoWriter_fourcc(*'mp4v'),  # Codec for .mp4 format
    fps,
    (frame_width, frame_height)
)

while True:
    ret, frame = cap.read()
    if not ret:
        print("End of video or error reading the video.")
        break

    # Perform object detection
    results = model(frame)

    # Render detections on the frame
    annotated_frame = results[0].plot()

    # Write the frame to the output video
    out.write(annotated_frame)

    # Display the output (optional)
    cv2.imshow("YOLOv5 Object Detection", annotated_frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and writer objects and close display windows
cap.release()
out.release()
cv2.destroyAllWindows()



0: 384x640 8 cars, 1 truck, 488.9ms
Speed: 12.0ms preprocess, 488.9ms inference, 15.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 1 bus, 1 truck, 320.1ms
Speed: 5.0ms preprocess, 320.1ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 1 truck, 356.0ms
Speed: 4.3ms preprocess, 356.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 1 truck, 362.2ms
Speed: 5.0ms preprocess, 362.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 1 truck, 420.1ms
Speed: 5.0ms preprocess, 420.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 1 truck, 376.5ms
Speed: 5.0ms preprocess, 376.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 1 truck, 391.7ms
Speed: 6.0ms preprocess, 391.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 1 truck, 449.3ms
Speed: 3.

KeyboardInterrupt: 

In [4]:
import cv2
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub

# Load the pre-trained SSD model from TensorFlow Hub
print("Loading model...")
model = hub.load("https://tfhub.dev/tensorflow/ssd_mobilenet_v2/2")
print("Model loaded successfully.")

# Function to perform object detection
def detect_objects(frame):
    input_tensor = tf.convert_to_tensor(frame, dtype=tf.uint8)
    input_tensor = input_tensor[tf.newaxis, ...]

    detections = model(input_tensor)

    # Extract information from detections
    detection_boxes = detections['detection_boxes'][0].numpy()
    detection_classes = detections['detection_classes'][0].numpy().astype(np.int32)
    detection_scores = detections['detection_scores'][0].numpy()

    return detection_boxes, detection_classes, detection_scores

# Load labels for the COCO dataset
LABELS = {
    1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus',
    7: 'train', 8: 'truck'
}

# Open video file or capture from webcam
video_input = cv2.VideoCapture("vid.mp4")  # Replace 0 with a file path for a video file
width = int(video_input.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video_input.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(video_input.get(cv2.CAP_PROP_FPS))

# Set up video writer
video_output = cv2.VideoWriter('output_with_detections.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

while video_input.isOpened():
    ret, frame = video_input.read()
    if not ret:
        print("End of video or error reading the video.")
        break

    # Convert frame to RGB for the model
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Perform object detection
    boxes, classes, scores = detect_objects(rgb_frame)

    # Draw detections on the frame
    for i in range(len(scores)):
        if scores[i] > 0.45:  # Confidence threshold
            box = boxes[i]
            class_id = classes[i]

            # Convert normalized coordinates to pixel coordinates
            y1, x1, y2, x2 = (box * [height, width, height, width]).astype(int)

            label = LABELS.get(class_id, 'Unknown')
            score_text = f"{label}: {scores[i]:.2f}"

            # Draw bounding box and label
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, score_text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Write the frame to the output video
    video_output.write(frame)

    # Display the frame with detections (optional)
    cv2.imshow('Object Detection', frame)

    # Press 'q' to quit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
video_input.release()
video_output.release()
cv2.destroyAllWindows()


Loading model...
Model loaded successfully.


KeyboardInterrupt: 

KeyboardInterrupt: 