In [1]:
import os
import re
import numpy as np
import cv2
from collections import defaultdict
from random import randrange
from ultralytics import YOLO
from ultralytics.utils.torch_utils import select_device
from hybrid_sort import Hybrid_Sort



In [2]:
# Utility functions
def create_output_directory(output_path):
    try:
        os.makedirs(output_path, exist_ok=True)
        print(f"Directory created at: {output_path}")
    except OSError as e:
        print(f"Error creating directory: {e}")

def list_files(directory):
    try:
        return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    except Exception as e:
        print(f"Error listing files in directory {directory}: {e}")
        return []

def get_sorted_file_paths(directory):
    files = list_files(directory)
    return [os.path.join(directory, f"frame_{i:05d}.png") for i in range(1, len(files) + 1)]

def write_mot_format(data, filename):
    try:
        with open(filename, "w") as f:
            for entry in data:
                line = "{},{},{},{},{},{},{},{},{},{}\n".format(*entry)
                f.write(line)
        print(f"MOT data written to {filename}")
    except Exception as e:
        print(f"Error writing to file {filename}: {e}")

# Core functions
def initialize_model(model_path):
    model = YOLO(model_path, 'detect')
    device = select_device('cpu')
    model.to(device)
    return model

def process_frame(frame, model, tracker, track_history, frame_id, colors):
    prediction = model.predict(frame)
    prediction_results = prediction[0].boxes.cpu().numpy()
    if len(prediction_results) == 0:
        pred_concatenate = np.empty((0, 5))
    else:
        pred_conf = prediction_results.conf.reshape(prediction_results.conf.shape[0], 1)
        pred_concatenate = np.concatenate((prediction_results.xyxy, pred_conf), axis=1)

    tracks = tracker.update(pred_concatenate, img_info=[1088, 1920], img_size=[1088, 1920])

    if len(tracks) == 0:
        return

    bbox_list = tracks[:, 0:4] 
    track_id_list = tracks[:, 4]

    for track_id, bbox in zip(track_id_list, bbox_list):
        if track_id not in colors:
            colors[track_id] = (randrange(255), randrange(255), randrange(255))

        x0, y0, x1, y1 = map(int, bbox)
        track_history.append((frame_id, track_id, x0, y0, x1 - x0, y1 - y0, -1, -1, -1, -1))

def process_video(dir_images, model, tracker, output_file):
    files_sorted = get_sorted_file_paths(dir_images)
    track_history = []
    colors = {}

    for file in files_sorted:
        match = re.search(r'frame_\d+\.png', file)
        if not match:
            continue

        frame_id = int(match.group(0).split("_")[1].split(".")[0])
        frame = cv2.imread(file)
        process_frame(frame, model, tracker, track_history, frame_id, colors)

    write_mot_format(track_history, output_file)

In [12]:
# Global constants
ALGORITHM = "hybridsort"
MODEL_NAME = "yolo11n_DJI_0010_V_2@fine-tuning" 
#"yolo11x_DJI_0008_V_and_0010_V_2@fine-tuning" 
# yolo11n_DJI_0010_V_2@fine-tuning 
# yolo11n_DJI_0008_V@fine-tuning
OUTPUT_PATH_MOT_FILES = f'../../output/mot_detections/{ALGORITHM}'
FRAME_RATE = 25
FRAME_WIDTH = 1920
FRAME_HEIGHT = 1088

create_output_directory(OUTPUT_PATH_MOT_FILES)

Directory created at: ../../output/mot_detections/hybridsort


In [13]:
import sys
from argparse import ArgumentParser, ArgumentTypeError

def str2bool(value):
    """
    Converts string input to boolean.
    """
    if isinstance(value, bool):
        return value
    if value.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif value.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise ArgumentTypeError(f"Boolean value expected, got {value}.")

def parse_args():
    parser = ArgumentParser(description="Hybrid-SORT Configuration Arguments")

    # Tracking params for Hybrid-SORT
    parser.add_argument("--ckpt", type=str, default="pretrained/bytetrack_dance_model.pth.tar",
                        help="Path to the checkpoint file")
    parser.add_argument("--use_byte", type=str2bool, default=True, help="Use ByteTrack for tracking")
    parser.add_argument("--dataset", type=str, default="dancetrack", help="Dataset name")
    parser.add_argument("--inertia", type=float, default=0.05, help="Inertia parameter for tracking")
    parser.add_argument("--iou_thresh", type=float, default=0.15, help="IoU threshold for tracking association")
    parser.add_argument("--asso", type=str, default="Height_Modulated_IoU", help="Association method")
    parser.add_argument("--TCM_first_step", type=str2bool, default=True, help="Use TCM in the first step")
    parser.add_argument("--TCM_byte_step", type=str2bool, default=True, help="Use TCM in the ByteTrack step")
    parser.add_argument("--TCM_first_step_weight", type=float, default=1.5, help="Weight for TCM first step")
    parser.add_argument("--TCM_byte_step_weight", type=float, default=1.0, help="Weight for TCM ByteTrack step")
    parser.add_argument("--hybrid_sort_with_reid", type=str2bool, default=False, help="Use re-ID in Hybrid-SORT")
    parser.add_argument("--track_thresh", type=float, default=0.4, help="Use re-ID in Hybrid-SORT")

    args = parser.parse_args()
    args_dict = vars(args)
    return args

# Simulate command-line arguments
sys.argv = [
    "notebook",  # Fake script name
    "--ckpt", "new_checkpoint.pth.tar",
    "--use_byte", "False",
    "--dataset", "new_dataset",
    "--inertia", "0.1",
    "--iou_thresh", "0.2",
    "--TCM_first_step", "True",
]

# Parse arguments
args = parse_args()
print("Arguments as Dictionary:")
print(args)


Arguments as Dictionary:
Namespace(TCM_byte_step=True, TCM_byte_step_weight=1.0, TCM_first_step=True, TCM_first_step_weight=1.5, asso='Height_Modulated_IoU', ckpt='new_checkpoint.pth.tar', dataset='new_dataset', hybrid_sort_with_reid=False, inertia=0.1, iou_thresh=0.2, track_thresh=0.4, use_byte=False)


In [14]:
name_video = "DJI_20240308110013_0004_V_3"
# DJI_20240308110228_0006_V_2 DJI_20240308111117_0010_V_1 DJI_20240308110115_0005_V 
# DJI_20240308110013_0004_V_1 DJI_20240308110013_0004_V_3
model_path = f"C:/Users/dnnxl/Documents/GitHub/drone-sort/weights/{MODEL_NAME}/best.pt"
dir_images = f"C:/Users/dnnxl/Documents/GitHub/drone-sort/dataset/{name_video}/valid/images"
output_dir = os.path.join(OUTPUT_PATH_MOT_FILES, f"{MODEL_NAME}@{name_video}")
output_file = os.path.join(output_dir, f"{name_video}.txt")

create_output_directory(output_dir)

model = initialize_model(model_path)
tracker = Hybrid_Sort(args, det_thresh=0.20)

process_video(dir_images, model, tracker, output_file)

Directory created at: ../../output/mot_detections/hybridsort\yolo11n_DJI_0010_V_2@fine-tuning@DJI_20240308110013_0004_V_3
Ultralytics 8.3.11  Python-3.8.18 torch-2.1.0 CPU (12th Gen Intel Core(TM) i5-1235U)

0: 384x640 (no detections), 190.6ms
Speed: 44.4ms preprocess, 190.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 95.4ms
Speed: 0.0ms preprocess, 95.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 pineapples, 93.3ms
Speed: 0.0ms preprocess, 93.3ms inference, 7.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 pineapple, 95.8ms
Speed: 0.0ms preprocess, 95.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 pineapples, 136.8ms
Speed: 3.6ms preprocess, 136.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 pineapples, 128.8ms
Speed: 3.5ms preprocess, 128.8ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

In [23]:
name_video = "DJI_20240308110228_0006_V_2"
# DJI_20240308110228_0006_V_2 DJI_20240308111117_0010_V_1 DJI_20240308110115_0005_V 
# DJI_20240308110013_0004_V_1 DJI_20240308110013_0004_V_3
model_path = f"C:/Users/dnnxl/Documents/GitHub/drone-sort/weights/{MODEL_NAME}/best.pt"
dir_images = f"C:/Users/dnnxl/Documents/GitHub/drone-sort/dataset/{name_video}/valid/images"
output_dir = os.path.join(OUTPUT_PATH_MOT_FILES, f"{MODEL_NAME}@{name_video}")
output_file = os.path.join(output_dir, f"{name_video}.txt")

create_output_directory(output_dir)

model = initialize_model(model_path)
tracker = Hybrid_Sort(args, det_thresh=0.20)

process_video(dir_images, model, tracker, output_file)

Directory created at: ../../output/mot_detections/hybridsort\yolo11n_DJI_0008_V@fine-tuning@DJI_20240308110228_0006_V_2
Ultralytics 8.3.11  Python-3.8.18 torch-2.1.0 CPU (12th Gen Intel Core(TM) i5-1235U)

0: 384x640 (no detections), 107.6ms
Speed: 2.5ms preprocess, 107.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 144.0ms
Speed: 1.9ms preprocess, 144.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 103.0ms
Speed: 3.0ms preprocess, 103.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 123.2ms
Speed: 2.0ms preprocess, 123.2ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 pineapples, 175.9ms
Speed: 5.0ms preprocess, 175.9ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 154.3ms
Speed: 3.0ms preprocess, 154.3ms inference, 1.0ms postprocess per image at shape (1, 3

In [24]:
name_video = "DJI_20240308111117_0010_V_1"
# DJI_20240308110228_0006_V_2 DJI_20240308111117_0010_V_1 DJI_20240308110115_0005_V 
# DJI_20240308110013_0004_V_1 DJI_20240308110013_0004_V_3
model_path = f"C:/Users/dnnxl/Documents/GitHub/drone-sort/weights/{MODEL_NAME}/best.pt"
dir_images = f"C:/Users/dnnxl/Documents/GitHub/drone-sort/dataset/{name_video}/valid/images"
output_dir = os.path.join(OUTPUT_PATH_MOT_FILES, f"{MODEL_NAME}@{name_video}")
output_file = os.path.join(output_dir, f"{name_video}.txt")

create_output_directory(output_dir)

model = initialize_model(model_path)
tracker = Hybrid_Sort(args, det_thresh=0.20)

process_video(dir_images, model, tracker, output_file)

Directory created at: ../../output/mot_detections/hybridsort\yolo11n_DJI_0008_V@fine-tuning@DJI_20240308111117_0010_V_1
Ultralytics 8.3.11  Python-3.8.18 torch-2.1.0 CPU (12th Gen Intel Core(TM) i5-1235U)

0: 384x640 9 pineapples, 215.1ms
Speed: 11.3ms preprocess, 215.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 pineapples, 151.9ms
Speed: 3.0ms preprocess, 151.9ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 pineapples, 157.8ms
Speed: 3.0ms preprocess, 157.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 pineapples, 129.4ms
Speed: 4.0ms preprocess, 129.4ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 pineapples, 131.0ms
Speed: 2.0ms preprocess, 131.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 pineapples, 137.0ms
Speed: 3.5ms preprocess, 137.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)



In [25]:
name_video = "DJI_20240308110115_0005_V"
# DJI_20240308110228_0006_V_2 DJI_20240308111117_0010_V_1 DJI_20240308110115_0005_V 
# DJI_20240308110013_0004_V_1 DJI_20240308110013_0004_V_3
model_path = f"C:/Users/dnnxl/Documents/GitHub/drone-sort/weights/{MODEL_NAME}/best.pt"
dir_images = f"C:/Users/dnnxl/Documents/GitHub/drone-sort/dataset/{name_video}/valid/images"
output_dir = os.path.join(OUTPUT_PATH_MOT_FILES, f"{MODEL_NAME}@{name_video}")
output_file = os.path.join(output_dir, f"{name_video}.txt")

create_output_directory(output_dir)

model = initialize_model(model_path)
tracker = Hybrid_Sort(args, det_thresh=0.20)

process_video(dir_images, model, tracker, output_file)

Directory created at: ../../output/mot_detections/hybridsort\yolo11n_DJI_0008_V@fine-tuning@DJI_20240308110115_0005_V
Ultralytics 8.3.11  Python-3.8.18 torch-2.1.0 CPU (12th Gen Intel Core(TM) i5-1235U)

0: 384x640 1 pineapple, 213.8ms
Speed: 5.2ms preprocess, 213.8ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 pineapples, 127.5ms
Speed: 2.5ms preprocess, 127.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 pineapples, 168.1ms
Speed: 3.0ms preprocess, 168.1ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 pineapples, 138.9ms
Speed: 3.0ms preprocess, 138.9ms inference, 6.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 pineapple, 129.6ms
Speed: 2.0ms preprocess, 129.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 pineapple, 134.7ms
Speed: 3.0ms preprocess, 134.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x

In [26]:
name_video = "DJI_20240308110013_0004_V_1"
# DJI_20240308110228_0006_V_2 DJI_20240308111117_0010_V_1 DJI_20240308110115_0005_V 
# DJI_20240308110013_0004_V_1 DJI_20240308110013_0004_V_3
model_path = f"C:/Users/dnnxl/Documents/GitHub/drone-sort/weights/{MODEL_NAME}/best.pt"
dir_images = f"C:/Users/dnnxl/Documents/GitHub/drone-sort/dataset/{name_video}/valid/images"
output_dir = os.path.join(OUTPUT_PATH_MOT_FILES, f"{MODEL_NAME}@{name_video}")
output_file = os.path.join(output_dir, f"{name_video}.txt")

create_output_directory(output_dir)

model = initialize_model(model_path)
tracker = Hybrid_Sort(args, det_thresh=0.20)

process_video(dir_images, model, tracker, output_file)

Directory created at: ../../output/mot_detections/hybridsort\yolo11n_DJI_0008_V@fine-tuning@DJI_20240308110013_0004_V_1
Ultralytics 8.3.11  Python-3.8.18 torch-2.1.0 CPU (12th Gen Intel Core(TM) i5-1235U)

0: 384x640 8 pineapples, 136.1ms
Speed: 8.0ms preprocess, 136.1ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 pineapples, 149.2ms
Speed: 2.0ms preprocess, 149.2ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 pineapples, 151.4ms
Speed: 4.0ms preprocess, 151.4ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 pineapples, 167.2ms
Speed: 3.3ms preprocess, 167.2ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 pineapples, 154.5ms
Speed: 3.0ms preprocess, 154.5ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 pineapples, 144.1ms
Speed: 3.0ms preprocess, 144.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: