In [None]:
!pip install torch torchvision torchaudio
!pip install opencv-python
!pip install ultralytics
!pip install tensorrt  


In [None]:
import torch
import cv2
import random
import time
import pathlib
from ultralytics import YOLO
import modules.utils as utils
from modules.autobackend import AutoBackend


In [None]:
# TensorRT inference function
def tensorrt_detection(model, source, image):
    im = utils.preprocess(image)  
    preds = model(im) 
    results = utils.postprocess(preds, im, image, model.names, source) 
    d = results[0].boxes
    cls, conf, box = d.cls.squeeze(), d.conf.squeeze(), d.xyxy.squeeze()
    return cls, conf, box


In [None]:
# YOLOv8 detection function
def yolov8_detection(model, image):
    results = model.predict(image, imgsz=640, conf=0.5, verbose=False)
    result = results[0].cpu()
    box = result.boxes.xyxy.numpy()
    conf = result.boxes.conf.numpy()
    cls = result.boxes.cls.numpy().astype(int)
    return cls, conf, box


In [None]:
# Main function to detect objects in videos
def detection(model_path, source, name):
    file_extension = pathlib.Path(model_path).suffix
    if file_extension == ".engine":
        model = AutoBackend(model_path, device=torch.device('cuda:0'), fp16=True)
        model.warmup()  # TensorRT model warmup
    else:
        model = YOLO(model_path)  # Load YOLOv8 model
    label_map = model.names
    COLORS = [[random.randint(0, 255) for _ in range(3)] for _ in label_map]
    video_cap = cv2.VideoCapture(source)
    total_frames = int(video_cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width, frame_height = int(video_cap.get(3)), int(video_cap.get(4))
    video_frames, total_fps, frame_count = [], 0, 0

    while video_cap.isOpened():
        ret, frame = video_cap.read()
        if not ret: break
        start = time.time()
        if file_extension == ".engine":
            cls, conf, box = tensorrt_detection(model, source, frame)
        else:
            cls, conf, box = yolov8_detection(model, frame)
        detection_output = list(zip(cls, conf, box))
        image_output = utils.draw_box(frame, detection_output, label_map, COLORS)
        frame_count += 1
        fps = 1 / (time.time() - start)
        total_fps += fps
        avg_fps = total_fps / frame_count
        image_output = utils.draw_fps(avg_fps, image_output)
        video_frames.append(image_output)
        print(f"({frame_count}/{total_frames}) Frames Processed")
    
    print(f"Average FPS: {avg_fps}")
    
    file_name, save_path = utils.get_name(source), utils.get_save_path(file_name, name)
    out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'XVID'), int(avg_fps), (frame_width, frame_height))
    for frame in video_frames:
        out.write(frame)
    out.release()
    print(f"Video saved to {save_path}")


In [None]:

detection("yolov8x.engine", "inference/road.mp4", "detection")

