In [1]:
import torch
import numpy as np
import torchvision

from pathlib import Path
import os

from ultralytics import YOLO

from PIL import Image

We will use the YOLO12s model from ultralytics (2.61 ms on T4, 9.3M params)

Object classes:
- 'Car', 'Van', 'Truck',
- 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
- 'Misc' or 'DontCare'

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = YOLO("yolo12s.pt")
model.model.to(device)

# model summary:
model.model


DetectionModel(
  (model): Sequential(
    (0): Conv(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (1): Conv(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (2): C3k2(
      (cv1): Conv(
        (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (cv2): Conv(
        (conv): Conv2d(96, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
 

### Preprocess the Data

We need to remove entries with label "Misc" or "DontCare" and also create the yaml file to feed into the YOLO model"


In [6]:
labels_path = Path("data_object_labels")

label_files_list = list(labels_path.glob("*.txt"))

num_files = len(label_files_list)
split = int(num_files * 0.8)

(labels_path / "train").mkdir()
(labels_path / "val").mkdir()

for label_path in label_files_list:
    new_lines = []

    split_type = "train" if int(label_path.stem) < split else "val"

    output_path = labels_path / split_type / label_path.name

    with open(label_path, 'r', encoding="utf-8") as f:
        for line in f:
            if "DontCare" in line or "Misc" in line:
                continue
            new_lines.append(line)

    label_path.unlink()

    with open(output_path, 'x', encoding="utf-8") as f:
        f.writelines(new_lines)



Create val directory


In [None]:
images_path = Path("data_object_images")

(images_path / "val").mkdir()

image_files_list = list((images_path/"train").glob("*.png"))

for image_path in image_files_list:
    split_type = "train" if int(image_path.stem) < split else "val"
    destination = images_path / split_type / image_path.name

    image_path.rename(destination)


In [None]:
labels_path = Path("kitti/labels")
images_path = Path("kitti/images")

label_files_list = list(labels_path.glob("*/*.txt"))

object_classes = ['Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram']
class_to_idx = {object_classes[i] : i for i in range(len(object_classes))}

for label_file in label_files_list:
    new_lines = []
    with open(label_file, 'r', encoding="utf-8") as f:
        for line in f:
            line_entries = line.split(" ")

            L, T, R, B = [float(i) for i in line_entries[4:8]]
            
            x_center = (L + R) / 2
            y_center = (T + B) / 2

            width = R - L
            height = B - T

            img_path = images_path / label_file.parent.name / (label_file.stem + ".png")
            img = Image.open(img_path)

            img_width, img_height = img.size

            x_center /= img_width; y_center /= img_height
            width /= img_width; height /= img_height

            new_lines.append(f"{class_to_idx[line_entries[0]]} {x_center} {y_center} {width} {height}\n")
    # print(new_lines)
    with open(label_file, 'w', encoding="utf-8") as f:
        f.writelines(new_lines)

