In [1]:
import cv2
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort

In [3]:
# Load YOLOv8 models
model_human = YOLO('yolov8n.pt')  # YOLO for human detection
model_tag = YOLO('best.pt')  # Trained model for Tag detection

# Initialize DeepSORT
tracker = DeepSort(max_age=30, nn_budget=100, override_track_class=None)

# Load your video
video_path = 'sample2.mp4'
cap = cv2.VideoCapture(video_path)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run inference with YOLOv8 for human detection
    results_human = model_human(frame)
    
    # Prepare detections for DeepSORT
    deepsort_detections = []
    for result in results_human:
        # Access bounding box data from 'boxes' attribute
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy.cpu().numpy()[0]
            conf = box.conf.cpu().numpy()
            class_id = int(box.cls)
            bbox = [x1, y1, x2, y2]
            deepsort_detections.append((bbox, conf, class_id))
    
    # Check if any detections are made
    print("Number of detections:", len(deepsort_detections))
    
    # Update DeepSORT with current frame and detections
    tracker_outputs = deepsort.update_tracks(deepsort_detections, frame)
    
    # Check if DeepSORT is returning any tracking data
    print("Number of tracks:", len(tracker_outputs))
    
    # Draw tracking results on the frame
    for track in tracker_outputs:
        track_id = track.track_id  # Get the track ID
        x1, y1, x2, y2 = track.to_tlbr()  # Convert the track's bounding box to top-left, bottom-right format
    
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {track_id}", (int(x1), int(y1-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    
    # Display the frame with tracking
    cv2.imshow('DeepSORT Tracking', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 35 persons, 2 birds, 38.0ms
Speed: 2.0ms preprocess, 38.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Number of detections: 37
Number of tracks: 37

0: 384x640 34 persons, 1 bird, 6.0ms
Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Number of detections: 35
Number of tracks: 35

0: 384x640 33 persons, 3 birds, 7.0ms
Speed: 1.0ms preprocess, 7.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Number of detections: 36
Number of tracks: 36

0: 384x640 36 persons, 2 birds, 6.0ms
Speed: 1.0ms preprocess, 6.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Number of detections: 38


ValueError: shapes (3,1280,3) and (3,1280,38) not aligned: 3 (dim 2) != 1280 (dim 1)