In [1]:
import cv2
import os
import random
import numpy as np
from ultralytics import YOLO

from deep_sort.deep_sort.tracker import Tracker as DeepSortTracker
from deep_sort.tools import generate_detections as gdet
from deep_sort.deep_sort import nn_matching
from deep_sort.deep_sort.detection import Detection

print("Imports done.")


Instructions for updating:
non-resource variables are not supported in the long term
Imports done.


In [2]:
from ultralytics import YOLO
import os
import random
import shutil
import xml.etree.ElementTree as ET

IMAGES_DIR = "train_data_raw/train_images"  # Directory containing training images.          
XML_FILE   = "train_data_raw/annotations.xml"       
OUTPUT_DIR = "train_data"
TRAIN_RATIO = 0.8                  

# Class mapping for your dataset (label_name -> class_index).
CLASS_MAP = {
    "car": 0,
    "bicycle": 1,
    "bus": 2,
    "person": 3,
    "motorbike": 4
}

In [3]:
def convert_to_yolo_bbox(xtl, ytl, xbr, ybr, img_width, img_height):
    """
    Convert CVAT bounding box [xtl, ytl, xbr, ybr]
    to YOLO bounding box [x_center, y_center, width, height] in relative coords.
    """
    x_center = ((xtl + xbr) / 2.0) / img_width
    y_center = ((ytl + ybr) / 2.0) / img_height
    w = (xbr - xtl) / img_width
    h = (ybr - ytl) / img_height
    return x_center, y_center, w, h

# Create train/val folders for images and labels
train_img_dir = os.path.join(OUTPUT_DIR, "images", "train")
val_img_dir   = os.path.join(OUTPUT_DIR, "images", "val")
train_lbl_dir = os.path.join(OUTPUT_DIR, "labels", "train")
val_lbl_dir   = os.path.join(OUTPUT_DIR, "labels", "val")

os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(val_img_dir, exist_ok=True)
os.makedirs(train_lbl_dir, exist_ok=True)
os.makedirs(val_lbl_dir, exist_ok=True)

In [4]:
tree = ET.parse(XML_FILE)
root = tree.getroot()

image_elements = root.findall('image')
print("Number of images in XML:", len(image_elements))

# Shuffle
image_elements = list(image_elements)
random.shuffle(image_elements)

train_count = int(len(image_elements) * TRAIN_RATIO)
train_elements = image_elements[:train_count]
val_elements   = image_elements[train_count:]
print(f"Train images: {len(train_elements)}, Val images: {len(val_elements)}")

Number of images in XML: 33
Train images: 26, Val images: 7


In [5]:
def process_images(image_subset, subset_name):
    """
    For each image in the subset, copy the image file to the appropriate train/val folder,
    create a YOLO label file with bounding boxes, and place it in labels/train or labels/val.
    """
    if subset_name == "train":
        img_out_dir = train_img_dir
        lbl_out_dir = train_lbl_dir
    else:
        img_out_dir = val_img_dir
        lbl_out_dir = val_lbl_dir

    for img_elem in image_subset:
        file_name = img_elem.attrib['name']
        width = float(img_elem.attrib['width'])
        height = float(img_elem.attrib['height'])

        src_img_path = os.path.join(IMAGES_DIR, file_name)
        dst_img_path = os.path.join(img_out_dir, file_name)

        if not os.path.exists(src_img_path):
            print(f"Warning: {src_img_path} not found. Skipping.")
            continue

        # Copy image
        shutil.copy2(src_img_path, dst_img_path)

        # Prepare label lines
        boxes = img_elem.findall('box')
        label_lines = []

        for b in boxes:
            label_str = b.attrib['label']
            xtl = float(b.attrib['xtl'])
            ytl = float(b.attrib['ytl'])
            xbr = float(b.attrib['xbr'])
            ybr = float(b.attrib['ybr'])

            # Convert label to class index
            if label_str not in CLASS_MAP:
                print(f"Warning: Label '{label_str}' not in CLASS_MAP. Skipping.")
                continue
            class_idx = CLASS_MAP[label_str]

            x_center, y_center, w, h = convert_to_yolo_bbox(
                xtl, ytl, xbr, ybr, width, height
            )
            label_line = f"{class_idx} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}"
            label_lines.append(label_line)

        txt_file_name = os.path.splitext(file_name)[0] + ".txt"
        txt_out_path = os.path.join(lbl_out_dir, txt_file_name)
        with open(txt_out_path, "w") as f:
            for line in label_lines:
                f.write(line + "\n")

process_images(train_elements, "train")
process_images(val_elements, "val")

print("Finished processing images and labels.")

Finished processing images and labels.


In [None]:
sorted_classes = sorted(CLASS_MAP.items(), key=lambda x: x[1])
data_yaml_path = os.path.join(OUTPUT_DIR, "dataset.yaml")
with open(data_yaml_path, "w") as f:
    f.write(f"train: {os.path.abspath(train_img_dir)}\n")
    f.write(f"val: {os.path.abspath(val_img_dir)}\n")
    f.write("names:\n")
    for k, v in sorted_classes:
        f.write(f"  {v}: {k}\n")

print(f"dataset.yaml created at {data_yaml_path}")
from pathlib import Path
model = YOLO('yolov8n.pt')
results = model.train(
    data=data_yaml_path,
    epochs=1000,
    imgsz=640,
    batch=32,
    name="masa_model"
)

print("Training completed. Check 'runs/detect/robotics_model' for logs and weights.")

dataset.yaml created at train_data\dataset.yaml
New https://pypi.org/project/ultralytics/8.3.146 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.145  Python-3.12.2 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=32, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=train_data\dataset.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=1000, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=trained_models\yolov8n.pt, momentum

100%|██████████| 5.35M/5.35M [00:00<00:00, 30.0MB/s]


[34m[1mAMP: [0mchecks passed 
[34m[1mtrain: [0mFast image access  (ping: 0.10.0 ms, read: 1602.8771.2 MB/s, size: 750.9 KB)


[34m[1mtrain: [0mScanning C:\Users\marco\Dropbox\Code\PYTHON\MASATracker\train_data\labels\train.cache... 33 images, 1 backgrounds, 0 corrupt: 100%|██████████| 33/33 [00:00<?, ?it/s]


[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 500.227.5 MB/s, size: 765.0 KB)


[34m[1mval: [0mScanning C:\Users\marco\Dropbox\Code\PYTHON\MASATracker\train_data\labels\val.cache... 23 images, 1 backgrounds, 0 corrupt: 100%|██████████| 23/23 [00:00<?, ?it/s]


Plotting labels to runs\detect\masa_model5\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001111, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns\detect\masa_model5[0m
Starting training for 1000 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/2 [00:05<?, ?it/s]


KeyboardInterrupt: 