In [1]:
!pip install -q git+https://github.com/THU-MIG/yolov10.git
!mkdir -p weights
!wget -P weights -q https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10n.pt
!wget -P weights -q https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10s.pt
!wget -P weights -q https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10m.pt
!wget -P weights -q https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10b.pt
!wget -P weights -q https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10x.pt
!wget -P weights -q https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10l.pt
!ls -lh weights

total 408M
-rw-rw-r-- 1 wins057 wins057  80M May 23  2024 yolov10b.pt
-rw-rw-r-- 1 wins057 wins057 100M May 23  2024 yolov10l.pt
-rw-rw-r-- 1 wins057 wins057  64M May 23  2024 yolov10m.pt
-rw-rw-r-- 1 wins057 wins057  11M May 23  2024 yolov10n.pt
-rw-rw-r-- 1 wins057 wins057  32M May 23  2024 yolov10s.pt
-rw-rw-r-- 1 wins057 wins057 123M May 23  2024 yolov10x.pt


In [2]:
!pip -q install boxmot

In [1]:
import numpy as np
from pathlib import Path
import cv2
from ultralytics import YOLO
from boxmot import BotSort

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def create_video_writer(video_cap, output_filename):
    # Grab the width, height, and fps of the frames in the video stream.
    frame_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_cap.get(cv2.CAP_PROP_FPS))

    # Initialize the FourCC and a video writer object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    writer = cv2.VideoWriter(output_filename, fourcc, fps, (frame_width, frame_height))

    return writer

In [4]:
# Initialize the tracker
reid_weights_path = Path('weights_ReID/pt/Market1501_clipreid_12x12sie_ViT-B-16_60.pt')
tracker = BotSort(
    reid_weights = Path('osnet_x0_25_msmt17.pt'),  # which ReID model to use
    device = 'cuda:0',
    half = False
)

# Initialize YOLO model
model = YOLO('/home/wins057/Documents/Projects/Tracking/boxmot/weights/yolov10x.pt')

# Open the input video
# input_video_path = 'videos/05_1F_2024_9_2_8mins.mp4'
input_video_path = '/home/wins057/Documents/Projects/Tracking/boxmot/videos/32_1F_2024_9_2_part2.mp4'
vid = cv2.VideoCapture(input_video_path)

if not vid.isOpened():
    print("Error: Could not open input video.")
    exit()

# Define the output video path
output_video_path = '/home/wins057/Documents/Projects/Tracking/boxmot/videos/32_1F_2024_9_2_part2_tracking.mp4'

# Create the video writer
out = create_video_writer(vid, output_video_path)

# Confidence threshold for filtering low-confidence detections
CONFIDENCE_THRESHOLD = 0.5

images = []
while True:
    ret, im = vid.read()
    if not ret:
        print("End of video reached or error reading frame.")
        break

    try:
      # Run the YOLO model on the frame - detect person only
      results = model(im, classes=[0], verbose=False)

      if len(results) >= 1:
        # Convert the detections to the required format: N X (x, y, x, y, conf, cls)
        dets = []
        for result in results:
          for boxes in result.boxes:
            conf = boxes.conf.item() # Get the confidence score
            if conf >= CONFIDENCE_THRESHOLD: # Filter based on confidence threshold
              # Extract bounding box coordinates
              x1, y1, x2, y2 = boxes.xyxy[0][0].item(), boxes.xyxy[0][1].item(), boxes.xyxy[0][2].item(), boxes.xyxy[0][3].item()
              cls = boxes.cls.item()
              dets.append([x1, y1, x2, y2, conf, int(cls)])
        dets = np.array(dets)

        # Check if there are any detections
        if dets.size > 0:
            # Update the tracker with the detections
            tracker.update(dets, im) # --> M X (x, y, x, y, id, conf, cls, ind)
        # If no detections, make prediction ahead
        else:
            dets = np.empty((0, 6))  # empty N X (x, y, x, y, conf, cls)
            tracker.update(dets, im) # --> M X (x, y, x, y, id, conf, cls, ind)

        # Plot results on the frame
        tracker.plot_results(im, show_trajectories=True)

      # Write the frame to the output video
      images.append(im)
      out.write(im)

    except Exception as e:
        print(f"An error occurred: {e}")
        break

vid.release()
out.release()
cv2.destroyAllWindows()

print(f"Tracking video saved to {output_video_path}")

[32m2024-12-25 13:30:23.191[0m | [1mINFO    [0m | [36mboxmot.utils.torch_utils[0m:[36mselect_device[0m:[36m52[0m - [1mYolo Tracking v11.0.6 🚀 Python-3.12.4 torch-2.2.2+cu121
CUDA:0 (NVIDIA GeForce RTX 3090, 24145MiB)[0m
[32m2024-12-25 13:30:23.212[0m | [32m[1mSUCCESS [0m | [36mboxmot.appearance.reid_model_factory[0m:[36mload_pretrained_weights[0m:[36m183[0m - [32m[1mLoaded pretrained weights from osnet_x0_25_msmt17.pt[0m
OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


----- []
----- [[     1718.3      61.357      1746.4      172.85           1     0.71241           0           0]]
----- [[     1703.8      61.286      1742.9      171.62           1      0.7913           0           0]]
----- [[     1692.1      61.807      1743.7      173.45           1     0.82965           0           0]]
----- [[     1685.5      62.112      1742.5      175.81           1     0.82632           0           0]]
----- [[     1683.3       60.66      1733.1      172.56           1     0.82478           0           0]]
----- [[     1682.6      59.704      1718.9      170.91           1     0.79399           0           0]]
----- [[       1674      58.779      1707.2      167.28           1     0.82931           0           0]]
----- [[       1662      58.575      1702.6      167.55           1     0.88145           0           0]]
----- [[     1650.8       59.31      1700.4      168.41           1     0.86645           0           0]]
----- [[     1647.2      58.972      