In [2]:
import cv2
import torch
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import numpy as np

In [3]:
import cv2
from tqdm import tqdm
from ultralytics import YOLO

# Load YOLOv8 model (pretrained or custom model)
model = YOLO("yolov8n.pt")  # Replace "yolov8n.pt" with your custom model if available

# Input video path
input_video_path = "./assets/test_video/OTV1.mp4"  # Replace with your video file path
output_video_path = "./output_video/inference_result.mp4"

# Load the video
cap = cv2.VideoCapture(input_video_path)

# Get video properties
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # Total frames in the video
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Create a video writer to save the output
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Initialize progress bar
with tqdm(total=frame_count, desc="Processing Video", unit="frame") as pbar:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run object detection on the frame with verbose=0 to suppress internal logging
        results = model(frame, verbose=False)

        # Parse and visualize results
        for result in results:
            for box in result.boxes:
                # Extract bounding box and confidence
                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
                conf = box.conf[0]
                cls = int(box.cls[0])
                label = f"{model.names[cls]} {conf:.2f}"

                # Draw bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

                # Add a filled rectangle as text background
                text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0]
                text_x, text_y = x1, y1 - 10  # Position of the text
                box_coords = ((text_x, text_y - 5), (text_x + text_size[0] + 2, text_y + text_size[1] - 5))
                cv2.rectangle(frame, box_coords[0], box_coords[1], (0, 255, 0), cv2.FILLED)

                # Put the text on top of the filled rectangle
                cv2.putText(
                    frame, label, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2
                )

        # Write the frame with detections to the output video
        out.write(frame)

        # Update progress bar
        pbar.update(1)

        # (Optional) Display the frame in real-time
        cv2.imshow("YOLOv8 Inference", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()


Processing Video: 100%|██████████| 1008/1008 [00:54<00:00, 18.59frame/s]
