In [4]:
from boxmot import (OCSORT, BoTSORT, BYTETracker, DeepOCSORT, StrongSORT,
                    create_tracker, get_tracker_config)
from pathlib import Path
import cv2
import sys
import numpy as np
import datetime
from ultralytics import YOLO

In [5]:
def create_video_writer(video_cap, output_filename):

    # grab the width, height, and fps of the frames in the video stream.
    frame_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_cap.get(cv2.CAP_PROP_FPS))

    # initialize the FourCC and a video writer object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    writer = cv2.VideoWriter(output_filename, fourcc, fps,
                             (frame_width, frame_height))

    return writer

In [24]:
color = (0, 0, 255)  # BGR
thickness = 1
fontscale = 0.5

device = "mps:0" # cuda:0 , cpu
fp16 = True # True if gpu available
weights = 'best' 
latest_train_dir = 'train_2023.12.03_19.33.40_n_im640_ep120_baNone_seNone'
model = YOLO(f"runs/detect/{latest_train_dir}/weights/{weights}.pt")
source = "002.mp4"

In [25]:
# tracker = BoTSORT(
#     model_weights=Path('osnet_x0_25_msmt17.pt'),
#     device=device,
#     fp16=fp16,
# )
class_dict = {
    # class, color (BGR)
    0: ('skater', (0, 30, 255)),
    1: ('goalie', (0, 255, 50)),
    2: ('referee', (255, 180, 0)),
}
tracker = DeepOCSORT(
    model_weights=Path('osnet_x0_25_msmt17.pt'), # which ReID model to use
    device=device,
    fp16=fp16,
)
vid = cv2.VideoCapture(source)
writer = create_video_writer(vid, "DeepOCSORT.mp4")

while True:
    ret, im = vid.read()

    detections = model.predict(im )[0]

    # initialize the list of bounding boxes and confidences
    results = []
    if not ret:
        break

    if np.array(detections.boxes.data.tolist()).ndim < 2:
        results = [[0, 0, 0, 0, 0.0922948837280273, 0]] # --> (x, y, x, y, id, conf, cls)
    ts = tracker.update(np.array(detections.boxes.data.tolist()), im) # --> (x, y, x, y, id, conf, cls)

    xyxys = ts[:,0:4].astype('int') # float64 to int
    ids = ts[:, 4].astype('int') # float64 to int 
    confs = ts[:, 5]
    clss = ts[:, 6]

    # print bboxes with their associated id, cls and conf
    if ts.shape[0] != 0:
        for xyxy, id, conf, cls in zip(xyxys, ids, confs, clss):
            im = cv2.rectangle(
                im,
                (xyxy[0], xyxy[1]),
                (xyxy[2], xyxy[3]),
                class_dict[cls][1],
                thickness
            )
            cv2.putText(
                im,
                f'{class_dict[cls][0]}, confidence: {round(conf, 1)}, id:{id}',
                (xyxy[0], xyxy[1]-10),
                cv2.FONT_HERSHEY_SIMPLEX,
                fontscale,
                class_dict[cls][1],
                thickness
            )

    # show the frame to our screen
    
    writer.write(im)


vid.release()
writer.release()

[32m2023-12-05 13:17:28.578[0m | [32m[1mSUCCESS [0m | [36mboxmot.appearance.reid_model_factory[0m:[36mload_pretrained_weights[0m:[36m207[0m - [32m[1mSuccessfully loaded pretrained weights from "osnet_x0_25_msmt17.pt"[0m
OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'

0: 384x640 6 skaters, 3 referees, 92.7ms
Speed: 2.0ms preprocess, 92.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 6 skaters, 3 referees, 88.1ms
Speed: 0.3ms preprocess, 88.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 5 skaters, 4 referees, 80.9ms
Speed: 0.6ms preprocess, 80.9ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 6 skaters, 3 referees, 91.5ms
Speed: 0.3ms preprocess, 91.5ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 384x640 6 skaters, 3 referees, 94.1ms
Sp