In [1]:
import cv2
import numpy as np
import time
from ultralytics import YOLO, NAS

import sys
sys.path.append(r'C:\Users\chena\Desktop\sort')
from sort import Sort

import os
os.chdir("..")

In [2]:
model = YOLO("runs/detect/train/weights/last.pt")

In [3]:
cap = cv2.VideoCapture(0)
tracker = Sort(max_age=10)

# Check if the webcam is opened
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

prev_time = 0  # Initialize previous time for FPS calculation

font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 0.8
color = (255, 0, 0)
thickness = 1
frame_n = 0

while True:
    
    # Read frame from webcam
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    # Get the current time
    curr_time = time.time()

    # Calculate FPS
    fps = 1 / (curr_time - prev_time) if prev_time != 0 else 0
    prev_time = curr_time

    # Display FPS on the frame
    fps_text = f"FPS: {fps:.2f}"
    cv2.putText(
        frame, fps_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2
    )

    detections = np.empty((0, 5))
    
    # Run YOLO inference on the frame
    results = model(frame, conf=0.4)  # Set confidence threshold as needed

    frame = results[0].plot()

    for box in results[0].boxes:
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

        cls = int(box.cls[0])
        conf = float(box.conf[0])

        if conf > 0.5:
            frame = cv2.putText(frame, results[0].names[cls], (x1, y1), font, 
                fontScale, color, thickness, cv2.LINE_AA)
            detections = np.vstack((detections, np.array([x1, y1, x2, y2, conf])))

    # Update tracker
    resultTracker = tracker.update(detections)

    for res in resultTracker:
        x1, y1, x2, y2, id = res
        x1, y1, x2, y2, id = int(x1), int(y1), int(x2), int(y2), int(id)
        frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        frame = cv2.putText(frame, str(id), (x1, y2), font,
                   fontScale, color, thickness, cv2.LINE_AA)

    # # Display the annotated frame
    # cv2.imshow("YOLO11 Webcam Detection", annotated_frame)
    cv2.imshow("YOLO11 Webcam Detection", frame)

    # Break loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    frame_n += 1

# Release resources
cap.release()
cv2.destroyAllWindows()


0: 480x640 (no detections), 35.7ms
Speed: 3.0ms preprocess, 35.7ms inference, 48.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 13.0ms
Speed: 2.0ms preprocess, 13.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 12.7ms
Speed: 1.0ms preprocess, 12.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 box, 12.0ms
Speed: 2.0ms preprocess, 12.0ms inference, 76.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 8.8ms
Speed: 2.0ms preprocess, 8.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 box, 9.5ms
Speed: 1.0ms preprocess, 9.5ms inference, 25.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 box, 11.0ms
Speed: 2.0ms preprocess, 11.0ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 box, 8.6ms
Speed: 1.0ms preprocess, 8.6ms inference, 2.0ms postprocess per image at sh