In [1]:
import os
import numpy as np
import cv2
from ultralytics import YOLO

In [43]:
# capture the video from the specified path.
video_path = os.path.join(".", "videos", "people.mp4")
cap = cv2.VideoCapture(video_path)

# extract the frames per second (fps) and frame size from the video.
fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# create a VideoWriter object to save the output video.
out = cv2.VideoWriter(
    os.path.join(".", "videos", "output.mp4"),
    cv2.VideoWriter_fourcc(*"mp4v"),
    fps,
    (frame_width, frame_height)
    )

# create a YOLOv8 model
model = YOLO("yolov8n.pt") 

In [19]:
class_names = model.names

In [28]:
from deep_sort.deep_sort import DeepSort

deep_sort_weights = 'deep_sort/deep/checkpoint/ckpt.t7'
tracker = DeepSort(model_path=deep_sort_weights, max_age=70)

In [35]:
colors = [
    (255, 0, 0),       # Red
    (0, 255, 0),       # Green
    (0, 0, 255),       # Blue
    (255, 255, 0),     # Yellow
    (255, 0, 255),     # Magenta
    (0, 255, 255),     # Cyan
    (255, 165, 0),     # Orange
    (128, 0, 128),     # Purple
    (0, 128, 128),     # Teal
    (128, 128, 0),     # Olive
]


In [44]:
unique_track_ids = set()

while cap.isOpened():

    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = model(frame_rgb, conf=0.5, iou=0.5)

    bbox_xywh = []
    confs = []

    for result in results:
        boxes = result.boxes
        for box in boxes:

            x1, y1, x2, y2, conf, cls = box.data[0].tolist()
            label = model.names[int(cls)]

            if label =="person":
                # x_center, y_center, width, height forat for deepsort tracker.
                xywh = box.xywh[0].cpu().numpy()  
                conf = conf

                bbox_xywh.append(xywh)
                confs.append(conf)
    
    bbox_xywh = np.array(bbox_xywh)
    confs = np.array(confs)

    # update the tracker with the new detections
    outputs = tracker.update(bbox_xywh, confs, frame_rgb)

    for output in outputs:

        x1, y1, x2, y2, track_id = map(int, output[:5])
        label = f"ID: {track_id}"
        unique_track_ids.add(track_id)

        # draw the bounding box and label on the frame
        cv2.rectangle(frame, (x1, y1), (x2, y2), colors[track_id % len(colors)], 2)
        cv2.putText(
            frame, label, (x1, y1 - 10), 
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[track_id % len(colors)], 2
            )
        
    person_count = len(unique_track_ids)

    cv2.putText(
        frame, f"Count: {person_count}", (10, 30), 
        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2
        )
    
    cv2.imshow("YOLOv8 Detection", frame)
    out.write(frame)
    
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break
        

cap.release()
out.release()
cv2.destroyAllWindows()


0: 384x640 21 persons, 1 bird, 65.8ms
Speed: 6.7ms preprocess, 65.8ms inference, 7.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 1 bird, 14.0ms
Speed: 2.8ms preprocess, 14.0ms inference, 5.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 20 persons, 1 bird, 10.1ms
Speed: 3.6ms preprocess, 10.1ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 1 bird, 11.3ms
Speed: 3.7ms preprocess, 11.3ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 2 birds, 15.1ms
Speed: 2.2ms preprocess, 15.1ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 17 persons, 1 bird, 13.1ms
Speed: 2.9ms preprocess, 13.1ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 2 birds, 12.5ms
Speed: 2.3ms preprocess, 12.5ms inference, 3.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 2 birds, 9.4ms
Speed: 