In [9]:
import os
import glob

def load_images_and_labels(images_path, labels_path, format):
    images = []
    labels = []
    for image_path in glob.glob(images_path + "/*.{}".format(format)):
        image_name = os.path.basename(image_path)
        label_path = os.path.join(labels_path, image_name.replace(format, "txt"))
        if not os.path.exists(label_path):
            continue
        with open(label_path) as f:
            label = []
            for line in f.read().strip().split("\n"):
                object_type, truncation, occlusion, alpha, x1, y1, x2, y2, h, w, l, x, y, z, ry = line.split(" ")
                annotation = {
                    "object_type": object_type,
                    "truncation": float(truncation),
                    "occlusion": int(occlusion),
                    "alpha": float(alpha),
                    "x1": float(x1),
                    "y1": float(y1),
                    "x2": float(x2),
                    "y2": float(y2),
                    "h": float(h),
                    "w": float(w),
                    "l": float(l),
                    "x": float(x),
                    "y": float(y),
                    "z": float(z),
                    "ry": float(ry)
                }
                label.append(annotation)
            labels.append((label_path, label))
        images.append(image_path)
    return images, labels

kitti_images, kitti_labels = load_images_and_labels("../../yolo-testing/datasets/kitti/images/train", "../../yolo-testing/datasets/kitti/labels/train", "png")

print("Kitti images: ", len(kitti_images))
print("Kitti labels: ", len(kitti_labels))

print(kitti_images[:5])
print(kitti_labels[:5])

assert len(kitti_images) == len(kitti_labels)

Kitti images:  7481
Kitti labels:  7481
['../../yolo-testing/datasets/kitti/images/train/004863.png', '../../yolo-testing/datasets/kitti/images/train/006912.png', '../../yolo-testing/datasets/kitti/images/train/006906.png', '../../yolo-testing/datasets/kitti/images/train/004877.png', '../../yolo-testing/datasets/kitti/images/train/005599.png']
[('../../yolo-testing/datasets/kitti/labels/train/004863.txt', [{'object_type': 'Car', 'truncation': 0.0, 'occlusion': 0, 'alpha': -1.55, 'x1': 572.44, 'y1': 181.56, 'x2': 611.95, 'y2': 219.71, 'h': 1.66, 'w': 1.73, 'l': 3.05, 'x': -0.82, 'y': 2.08, 'z': 33.4, 'ry': -1.57}, {'object_type': 'Tram', 'truncation': 0.0, 'occlusion': 0, 'alpha': -1.48, 'x1': 498.85, 'y1': 149.49, 'x2': 551.4, 'y2': 204.32, 'h': 3.62, 'w': 2.6, 'l': 15.21, 'x': -6.27, 'y': 2.16, 'z': 55.58, 'ry': -1.6}, {'object_type': 'Car', 'truncation': 0.0, 'occlusion': 1, 'alpha': 2.12, 'x1': 56.46, 'y1': 195.89, 'x2': 220.78, 'y2': 260.91, 'h': 1.47, 'w': 1.71, 'l': 4.36, 'x': -1

In [4]:
new_dataset_dir = "../../yolo-testing/datasets/kitti-yolo"

if not os.path.exists(new_dataset_dir):
    os.makedirs(new_dataset_dir)

new_images_dir = os.path.join(new_dataset_dir, "images")
if not os.path.exists(new_images_dir):
    os.makedirs(new_images_dir)

new_labels_dir = os.path.join(new_dataset_dir, "labels")
if not os.path.exists(new_labels_dir):
    os.makedirs(new_labels_dir)

num_train_images = 5241
num_val_images = 2240
assert num_train_images + num_val_images == len(kitti_images)

new_images_train_dir = os.path.join(new_images_dir, "train")
new_images_val_dir = os.path.join(new_images_dir, "val")
if not os.path.exists(new_images_train_dir):
    os.makedirs(new_images_train_dir)
if not os.path.exists(new_images_val_dir):
    os.makedirs(new_images_val_dir)

new_labels_train_dir = os.path.join(new_labels_dir, "train")
new_labels_val_dir = os.path.join(new_labels_dir, "val")
if not os.path.exists(new_labels_train_dir):
    os.makedirs(new_labels_train_dir)
if not os.path.exists(new_labels_val_dir):
    os.makedirs(new_labels_val_dir)


# Schema for kitti labels

example:

```
Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01
```

where

```
<object_type> <truncation> <occlusion> <alpha> <left> <top> <right> <bottom> <height> <width> <length> <x> <y> <z> <rotation_y>
```

so in this case

```
object_type = Pedestrian
truncation = 0.00
occlusion = 0
alpha = -0.20
left = 712.40
top = 143.0
right = 810
bottom = 307
height = 1.89
width = 0.48
length = 1.20
x = 1.84
y = 1.47
z = 8.41
rotation_y = 0.01
```

- **Object Type**: One of the following categories: `Car`, `Van`, `Truck`, `Pedestrian`, `Person_sitting`, `Cyclist`, `Tram`, `Misc`, or `DontCare` (used for ignored objects).  
- **Truncation**: Fraction of the object visible (`0.0` = fully visible, `1.0` = fully outside the frame).  
- **Occlusion**: Integer representing occlusion level (`0` = fully visible, higher values indicate more occlusion).  
- **Alpha**: Observation angle in radians, relative to the camera's positive x-axis.  
- **Bounding Box (`left, top, right, bottom`)**: 2D pixel coordinates for the object's bounding box.  
- **Dimensions (`height, width, length`)**: Object's size in meters.  
- **3D Position (`x, y, z`)**: Centroid location in the camera coordinate system (in meters).  
- **Rotation (`rotation_y`)**: Rotation around the y-axis in camera coordinates (in radians).  

In [None]:
import math 
from PIL import Image
import shutil

class2index = {
    'Car': 0,
    'Van': 1,
    'Truck': 2,
    'Pedestrian': 3,
    'Person_sitting': 4,
    'Cyclist': 5,
    'Tram': 6,
    'Misc': 7,
    'DontCare': 8
}

def ltrb_to_xywh(img_w, img_h, l, t, r, b) -> tuple[float, float, float, float]:
    w = r - l
    h = b - t
    x = l + w / 2
    y = t + h / 2
    x_rel = x / img_w
    y_rel = y / img_h
    w_rel = w / img_w
    h_rel = h / img_h
    # truncate to 4 decimal places
    return round(x_rel, 4), round(y_rel, 4), round(w_rel, 4), round(h_rel, 4)

for label_path, label in kitti_labels[:num_train_images]:
    output_label_path = new_labels_train_dir + "/" + os.path.basename(label_path)
    image_path = label_path.replace("labels", "images").replace("txt", "png")
    # get image dimensions with PIL

    with Image.open(image_path) as img:
        img_w, img_h = img.size

    shutil.copy(image_path, new_images_train_dir)

    with open(output_label_path, "w") as f:
        for annotation in label:
            c = class2index[annotation['object_type']]
            x, y, w, h = ltrb_to_xywh(img_w, img_h, annotation['x1'], annotation['y1'], annotation['x2'], annotation['y2'])
            if c == 8: # DontCare
                distance_to_object = 0
            else:
                distance_to_object = math.sqrt(annotation['x']**2 + annotation['y']**2 + annotation['z']**2)
                # Clamp and normalize distance to object
                if distance_to_object > 150:
                    distance_to_object = 150 # max distance in KITTI dataset
                if distance_to_object < 0:
                    distance_to_object = 0 # min distance in KITTI dataset
                distance_to_object = distance_to_object / 150 # normalize to [0, 1]
                distance_to_object = round(distance_to_object, 4) # truncate to 4 decimal places
            f.write("{} {} {} {} {} {}\n".format(c, x, y, w, h, distance_to_object))

for label_path, label in kitti_labels[num_train_images:]:
    output_label_path = new_labels_val_dir + "/" + os.path.basename(label_path)
    image_path = label_path.replace("labels", "images").replace("txt", "png")
    # get image dimensions with PIL

    with Image.open(image_path) as img:
        img_w, img_h = img.size

    shutil.copy(image_path, new_images_val_dir)
    
    with open(output_label_path, "w") as f:
        for annotation in label:
            c = class2index[annotation['object_type']]
            x, y, w, h = ltrb_to_xywh(img_w, img_h, annotation['x1'], annotation['y1'], annotation['x2'], annotation['y2'])
            if c == 8: # DontCare
                distance_to_object = 0
            else:
                distance_to_object = math.sqrt(annotation['x']**2 + annotation['y']**2 + annotation['z']**2)
                # Clamp and normalize distance to object
                if distance_to_object > 150:
                    distance_to_object = 150 # max distance in KITTI dataset
                if distance_to_object < 0:
                    distance_to_object = 0 # min distance in KITTI dataset
                distance_to_object = distance_to_object / 150 # normalize to [0, 1]
                distance_to_object = round(distance_to_object, 4) # truncate to 4 decimal places
            f.write("{} {} {} {} {} {}\n".format(c, x, y, w, h, distance_to_object))