In [24]:
import cv2
from ultralytics import YOLO
from collections import defaultdict

#Loading the YOLO model
model = YOLO('yolo11n.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...


100%|███████████████████████████████████████████████████████████| 5.35M/5.35M [00:00<00:00, 21.4MB/s]


In [31]:
"""
class_list

 0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',

 The list continues but for our project we will be focusing on {1, 2, 3, 5, 7}
 only
 
"""
class_list = model.names



In [61]:
# Get the traffic stock video
capture = cv2.VideoCapture('../dataset/traffic_stock (2).mp4')

In [62]:
while capture.isOpened():
    ret, frame = capture.read()
    if not ret:
        break

    # Each frame has to be tracked using YOLO
    results = model.track(frame, persist=True)
    #print(results)

    # To ensure that results are not empty
    if results[0].boxes.data is not None:
        # Now get the detected boxe, their class indices, and track IDs
        boxes = results[0]. boxes.xyxy.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        class_indices = results[0].boxes.cls.int().cpu().tolist()
        confidences = results[0].boxes.conf.cpu()

    # Loop through each detected object
    for box, track_id, class_idx, conf in zip(boxes, track_ids, class_indices, confidences):
        x1, y1, x2, y2 = map(int, box)

        # To find the center of each bounding box
        cx = (x1 + x2) // 2
        cy = (y1 + y2) // 2
        cv2.circle(frame, (cx, cy), 4, (49, 245, 49), -1)
        

        class_name = class_list[class_idx]

        cv2.putText(frame, f"Id: {track_id} {class_name}", (x1, y1 - 10), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 17, 255), 2)
        
    # Video display
    cv2.imshow("YOLO tracking...", frame)

    #Key binding to quit --> q
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release all resources
capture.release()
cv2.destroyAllWindows()    


0: 384x640 1 car, 1 truck, 79.6ms
Speed: 2.8ms preprocess, 79.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 1 truck, 93.5ms
Speed: 2.3ms preprocess, 93.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 1 truck, 69.8ms
Speed: 2.2ms preprocess, 69.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 truck, 68.5ms
Speed: 1.9ms preprocess, 68.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 truck, 65.8ms
Speed: 2.2ms preprocess, 65.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 2 trucks, 69.9ms
Speed: 2.4ms preprocess, 69.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 truck, 71.0ms
Speed: 2.2ms preprocess, 71.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 2 trucks, 68.3ms
Speed: 2.2ms preprocess, 68.3ms 