## Imports

In [25]:
import numpy as np
import supervision as sv

from ultralytics import YOLO

## Inference pipeline

In [28]:
# Load YOLOv8 nano.
model = YOLO("models/yolov8s.pt")
tracker = sv.ByteTrack()
box_annotator = sv.BoundingBoxAnnotator()
label_annotator = sv.LabelAnnotator()
trace_annotator = sv.TraceAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model(frame)[0]
    detections = sv.Detections.from_ultralytics(results)
    detections = tracker.update_with_detections(detections)

    labels = [
        f"Object #{tracker_id}; P({results.names[class_id].upper()}) = {confidence*100:.2f}%"
        for class_id, tracker_id, confidence
        in zip(detections.class_id, detections.tracker_id, detections.confidence)
    ]

    annotated_frame = box_annotator.annotate(frame.copy(), detections=detections)
    annotated_frame = label_annotator.annotate(annotated_frame, detections=detections, labels=labels)

    return trace_annotator.annotate(annotated_frame, detections=detections)

# Applies a callback function on each frame in the source video
# and saves the result to a target file.
sv.process_video(
    source_path="inputs/rizztopher.mp4",
    target_path="outputs/rizz_detected.mp4",
    callback=callback
)


0: 640x384 1 bed, 5.7ms
Speed: 0.7ms preprocess, 5.7ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 bed, 4.8ms
Speed: 0.9ms preprocess, 4.8ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 bed, 4.3ms
Speed: 0.9ms preprocess, 4.3ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 bed, 3.8ms
Speed: 0.7ms preprocess, 3.8ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 bed, 4.1ms
Speed: 0.9ms preprocess, 4.1ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 bed, 6.3ms
Speed: 0.7ms preprocess, 6.3ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 bed, 3.6ms
Speed: 0.8ms preprocess, 3.6ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 bed, 3.8ms
Speed: 0.7ms preprocess, 3.8ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 bed, 3.5ms
Speed: 