# This notebook assumes you have exdark downloaded in ../../yolo-testing/datasets/exDark

it should look like
* imageclasslist.txt
* images
* * Bicycle
* * * 2015_00001.png
* labels
* * Bicycle
* * * 2015_00001.png.txt

The imageclasslist.txt has columns
Name | Class | Light | In/Out | Train/Val/Test
2015_00001.png 1 2 1 1
2015_00002.png 1 6 2 1
2015_00003.png 1 5 2 1

- Name is filename
- Class is Bicycle(1), Boat(2), Bottle(3), Bus(4), Car(5), Cat(6), Chair(7), Cup(8), Dog(9), Motorbike(10), People(11), Table(12)
- Light is Low(1), Ambient(2), Object(3), Single(4), Weak(5), Strong(6), Screen(7), Window(8), Shadow(9), Twilight(10)
- In/Out is Indoor(1), Outdoor(2)
- Train/val/Test is Training(1), Validation(2), Testing(3)

In [76]:
id2cls = {
    '1': "Bicycle",
    '2': "Boat",
    '3': "Bottle",
    '4': "Bus",
    '5': "Car",
    '6': "Cat",
    '7': "Chair",
    '8': "Cup",
    '9': "Dog",
    '10': "Motorbike",
    '11': "People",
    '12': "Table"
}

id2light = {
    '1': "low",
    '2': "ambient",
    '3': "object",
    '4': "single",
    '5': "weak",
    '6': "strong",
    '7': "screen",
    '8': "window",
    '9': "shadow",
    '10': "twilight"
}

id2inout = {
    '1': "indoor",
    '2': "outdoor"
}

id2testset = {
    '1': "train",
    '2': "val",
    '3': "test"
}

In [77]:
import os

dataset_path = "../../yolo-testing/datasets/exDark"
output_path = "../../yolo-testing/datasets/exDark-yolo"

if not os.path.exists(output_path):
    os.makedirs(output_path)
    print("Created directory: ", output_path)
else:
    print("Directory already exists: ", output_path)



Directory already exists:  ../../yolo-testing/datasets/exDark-yolo


In [78]:
# Parse the imageclasslist.txt file

with open(os.path.join(dataset_path, "imageclasslist.txt"), "r") as f:
    lines = f.readlines()

print("Found info for {} images".format(len(lines) - 1))


Found info for 7363 images


In [79]:
imageclasslist = []
for i, line in enumerate(lines):
    if i == 0:
        continue
    line = line.strip().split(" ")
    imageclasslist.append({
        "filename": line[0],
        "class": line[1],
        "light": line[2],
        "in_out": line[3],
        "test_set": line[4]
    })

def print_info(imageclass: dict):
    print("Filename: ", imageclass["filename"])
    print("Class: ", id2cls[imageclass["class"]])
    print("Light: ", id2light[imageclass["light"]])
    print("In/Out: ", id2inout[imageclass["in_out"]])
    print("Test set: ", id2testset[imageclass["test_set"]])

print_info(imageclasslist[2])

Filename:  2015_00003.png
Class:  Bicycle
Light:  weak
In/Out:  outdoor
Test set:  train


In [80]:
filename2set = {}
for imageclass in imageclasslist:
    filename2set[imageclass["filename"]] = id2testset[imageclass["test_set"]]

print(filename2set)

{'2015_00001.png': 'train', '2015_00002.png': 'train', '2015_00003.png': 'train', '2015_00004.jpg': 'train', '2015_00005.jpg': 'train', '2015_00006.jpg': 'train', '2015_00007.jpg': 'train', '2015_00008.jpg': 'train', '2015_00009.jpg': 'train', '2015_00010.jpg': 'train', '2015_00011.jpg': 'train', '2015_00012.jpg': 'train', '2015_00013.jpg': 'train', '2015_00014.jpg': 'train', '2015_00015.jpg': 'train', '2015_00016.jpg': 'train', '2015_00017.jpg': 'train', '2015_00018.jpg': 'train', '2015_00019.jpg': 'train', '2015_00020.jpg': 'train', '2015_00021.jpg': 'train', '2015_00022.jpg': 'train', '2015_00023.jpg': 'train', '2015_00024.JPG': 'train', '2015_00025.jpg': 'train', '2015_00026.jpg': 'train', '2015_00027.jpg': 'train', '2015_00028.jpg': 'train', '2015_00029.jpg': 'train', '2015_00030.jpg': 'train', '2015_00031.jpg': 'train', '2015_00032.jpg': 'train', '2015_00033.jpg': 'train', '2015_00034.jpg': 'train', '2015_00035.jpg': 'train', '2015_00036.jpg': 'train', '2015_00037.jpg': 'train', 

In [81]:
# Copy images to the output directory folders, split into train, val, test

train_path = os.path.join(output_path, "images", "train")
val_path = os.path.join(output_path, "images", "val")
test_path = os.path.join(output_path, "images", "test")

if not os.path.exists(train_path):
    os.makedirs(train_path)
    print("Created directory: ", train_path)
else:
    print("Directory already exists: ", train_path)

if not os.path.exists(val_path):
    os.makedirs(val_path)
    print("Created directory: ", val_path)
else:
    print("Directory already exists: ", val_path)

if not os.path.exists(test_path):
    os.makedirs(test_path)
    print("Created directory: ", test_path)
else:
    print("Directory already exists: ", test_path)



Directory already exists:  ../../yolo-testing/datasets/exDark-yolo/images/train
Directory already exists:  ../../yolo-testing/datasets/exDark-yolo/images/val
Directory already exists:  ../../yolo-testing/datasets/exDark-yolo/images/test


In [82]:
import shutil

for imageclass in imageclasslist:
    source = os.path.join(dataset_path, "images", id2cls[imageclass["class"]], imageclass["filename"])
    dest = os.path.join(output_path, "images", id2testset[imageclass["test_set"]], imageclass["filename"])
    if not os.path.exists(dest):
        shutil.copy(source, dest)

In [83]:
import glob
# Fix labels for the images
labels_list = []
for value in id2cls.values():
    paths = glob.glob(os.path.join(dataset_path, "labels", value, "*.txt"))
    for path in paths:
        with open(path, "r") as f:
            lines = f.readlines()
            labels_list.append({"filename": os.path.basename(path), "annotations": lines})

print("len", len(labels_list))
print(labels_list[0])

len 7363
{'filename': '2015_00092.jpg.txt', 'annotations': ['% bbGt version=3\n', 'Bicycle 298 217 88 117 0 0 0 0 0 0 0\n', 'Car 211 174 64 33 0 0 0 0 0 0 0\n', 'People 134 174 30 29 0 0 0 0 0 0 0\n']}


In [84]:
cls2yoloid = {v: int(k) - 1 for k, v in id2cls.items()}
print(cls2yoloid)

{'Bicycle': 0, 'Boat': 1, 'Bottle': 2, 'Bus': 3, 'Car': 4, 'Cat': 5, 'Chair': 6, 'Cup': 7, 'Dog': 8, 'Motorbike': 9, 'People': 10, 'Table': 11}


In [89]:

def convert_annotation(annotation: str, filename: str):
    from PIL import Image

    annotation = annotation.strip().split(" ")
    cls = cls2yoloid[annotation[0]]

    image_filename = filename[:-4]
    test_set = "val" if image_filename == "2015_00391.jpg" else filename2set[image_filename]
    image_path = os.path.join(output_path, "images", test_set, image_filename)
    if not os.path.exists(image_path):
        print("Warning: image not found: ", image_path)

    image_width, image_height = Image.open(image_path).size
    print(image_width, image_height)

    l = float(annotation[1]) # pixel number from left
    if l < 0 or l > image_width:
        print(annotation)
        print("Warning: l is out of bounds: ", l)

    t = float(annotation[2]) # pixel number from top
    if t < 0 or t > image_height:
        print("Warning: t is out of bounds: ", t)

    w = float(annotation[3]) # width of the bounding box
    if w < 0 or w > image_width:
        print("Warning: w is out of bounds: ", w)

    h = float(annotation[4]) # height of the bounding box
    if h < 0 or h > image_height:
        print("Warning: h is out of bounds: ", h)

    # Convert to YOLO format
    x = (l + w / 2) / image_width
    y = (t + h / 2) / image_height
    w = w / image_width
    h = h / image_height
    return f"{cls} {x:.4f} {y:.4f} {w:.4f} {h:.4f}"

    
def convert_annotations(annotations: list, filename: str):
    return "\n".join([convert_annotation(a, filename) for a in annotations if not a.startswith("%")])

yolo_annotations = []
for labels in labels_list:
    filename = labels["filename"]
    annotations = convert_annotations(labels["annotations"], filename)
    yolo_annotations.append({"filename": filename, "annotations": annotations})

print(yolo_annotations[0])
print(len(yolo_annotations))



507 337
507 337
507 337
500 374
500 374
500 374
339 506
339 506
339 506
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
500 375
338 507
338 507
338 507
338 507
338 507
640 480
640 480
640 480
2448 2448
2448 2448
729 1097
576 432
576 432
576 432
576 432
576 432
576 432
576 432
576 432
576 432
640 480
640 480
640 480
640 480
640 480
640 480
640 427
640 480
640 480
640 426
640 426
640 426
640 426
640 426
640 426
640 426
640 426
640 426
640 426
640 426
480 357
480 357
638 344
480 640
480 640
480 640
480 640
480 640
480 640
480 640
480 640
480 640
480 640
480 640
480 640
480 640
480 640
640 480
640 480
640 480
640 427
640 427
640 427
1024 683
1024 683
683 1024
683 1024
498 750
498 750
498 750
335 500
335 500
335 500
867 522
867 522
767 536
767 536
767 536
640 360
640 360
337 506
337 506
337 506
337 506
337 506
337 506
337 506
337 506
337 506
337 506
720 720
720 720
720 720

In [90]:
# Save the annotations to the output directory
train_path = os.path.join(output_path, "labels", "train")
val_path = os.path.join(output_path, "labels", "val")
test_path = os.path.join(output_path, "labels", "test")

if not os.path.exists(train_path):
    os.makedirs(train_path)
    print("Created directory: ", train_path)

if not os.path.exists(val_path):
    os.makedirs(val_path)
    print("Created directory: ", val_path)

if not os.path.exists(test_path):
    os.makedirs(test_path)
    print("Created directory: ", test_path)

for annotation in yolo_annotations:
    image_name = annotation["filename"][:-4]
    test_set = "val" if image_name == "2015_00391.jpg" else filename2set[image_name]
    new_filename = annotation["filename"].replace(".jpg", "")
    new_filename = new_filename.replace(".JPG", "")
    new_filename = new_filename.replace(".png", "")
    new_filename = new_filename.replace(".JPEG", "")
    dest = os.path.join(output_path, "labels", test_set, new_filename)
    if not os.path.exists(dest):
        with open(dest, "w") as f:
            f.write(annotation["annotations"])

Created directory:  ../../yolo-testing/datasets/exDark-yolo/labels/train
Created directory:  ../../yolo-testing/datasets/exDark-yolo/labels/val
Created directory:  ../../yolo-testing/datasets/exDark-yolo/labels/test
