In [1]:
import cv2
import numpy as np
from ultralytics import YOLO

In [2]:
# Load the YOLO11 model
model = YOLO("yolo11n.pt")
class_names = model.model.names

In [3]:
# Open the video file
video_path = "downtown.mp4"
cap = cv2.VideoCapture(video_path)

In [5]:
class Direction:
    def get_direction(dx, dy):
        if abs(dx) < 2 and abs(dy) < 2:
            return "Static"
        angle = np.arctan2(-dy, dx) * 180 / np.pi
        if -45 <= angle < 45:
            return "Right"
        elif 45 <= angle < 135:
            return "Up"
        elif -135 <= angle < -45:
            return "Down"
        else:
            return "Left"

In [6]:
def get_direction(dx, dy):
    if abs(dx) < 2 and abs(dy) < 2:
        return "Static"
    angle = np.arctan2(-dy, dx) * 180 / np.pi
    if -45 <= angle < 45:
        return "Right"
    elif 45 <= angle < 135:
        return "Up"
    elif -135 <= angle < -45:
        return "Down"
    else:
        return "Left"

In [7]:
prev_centers = {}
classes=[0, 1, 2, 3, 5, 7]

In [None]:
# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()
    if success:
        # Run YOLO11 tracking on the frame, persisting tracks between frames
        results = model.track(frame, persist=True, classes=classes, show=False)
        annotated_frame = results[0].plot()
        
        for box in results[0].boxes:
            if box.id is None:
                continue
            
            track_id = int(box.id.item())
            class_id = int(box.cls[0])
            class_name = class_names[class_id]
            
            x1, y1, x2, y2 = box.xyxy[0].tolist()
            cx = int((x1 + x2) / 2)
            cy = int((y1 + y2) / 2)
            center = (cx, cy)
                
            if track_id in prev_centers:
                dx = center[0] - prev_centers[track_id][0]
                dy = center[1] - prev_centers[track_id][1]
                direction = get_direction(dx,dy)
                    
                cv2.putText(annotated_frame,
                           f"{class_name} ID {track_id}: {direction}",
                           (cx, cy - 10),
                           cv2.FONT_HERSHEY_SIMPLEX,
                           0.6, (0, 255, 0), 2)
            prev_centers[track_id] = center
        
        # Visualize the results on the frame
        #annotated_frame = results[0].plot()

        # Display the annotated frame
        cv2.imshow("YOLO11 Tracking", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break
# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()


0: 384x640 4 persons, 7 cars, 200.1ms
Speed: 8.0ms preprocess, 200.1ms inference, 12.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 7 cars, 86.6ms
Speed: 4.2ms preprocess, 86.6ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 7 cars, 79.6ms
Speed: 2.8ms preprocess, 79.6ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 7 cars, 78.4ms
Speed: 2.7ms preprocess, 78.4ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 6 cars, 77.6ms
Speed: 2.7ms preprocess, 77.6ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 6 cars, 78.9ms
Speed: 2.2ms preprocess, 78.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 6 cars, 77.6ms
Speed: 2.4ms preprocess, 77.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 6 cars, 78.6ms
Speed: 2.6ms p