In [1]:
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator

In [2]:
model = YOLO("yolov9c.pt")

In [3]:
def predict (model, img, classes=[], conf=0.5):
    
    if classes:
        results = model.predict(img, classes = classes, conf = conf)
    else:
        results = model.predict(img, conf=conf)
    
    return results

In [4]:
def detect(model, img, classes = [], conf = 0.5):

    results = predict(model, img, classes, conf=conf)

    for r in results:
        annotator = Annotator(img)
        boxes = r.boxes

        for box in boxes:
            b = box.xyxy[0]
            c = box.cls
            annotator.box_label(b, model.names[int(c)])
    
    return img

In [None]:
video_path = "videos/NY2.mp4"
cap = cv2.VideoCapture(video_path)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec para v√≠deo MP4

output_path = "videos/output.mp4"
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

while cap.isOpened():
    success, img = cap.read()

    if not success:
        break
    
    result_img = detect(model, img, classes=[], conf=0.5)

    if result_img.shape[:2] != (frame_height, frame_width):
        result_img = cv2.resize(result_img, (frame_width, frame_height))

    out.write(result_img)

cap.release()
out.release()
cv2.destroyAllWindows()


0: 384x640 6 persons, 5 cars, 1 truck, 1 backpack, 1020.0ms
Speed: 11.4ms preprocess, 1020.0ms inference, 16.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 5 cars, 2 backpacks, 863.2ms
Speed: 10.6ms preprocess, 863.2ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 5 cars, 1 truck, 2 backpacks, 811.0ms
Speed: 5.0ms preprocess, 811.0ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 5 cars, 1 truck, 2 backpacks, 919.2ms
Speed: 6.6ms preprocess, 919.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 5 cars, 1 truck, 2 backpacks, 1005.6ms
Speed: 6.5ms preprocess, 1005.6ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 5 cars, 2 trucks, 2 backpacks, 1122.4ms
Speed: 5.5ms preprocess, 1122.4ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 5 cars, 2 trucks, 2 bac