In [None]:
import json
import os
import shutil

def bbox_to_yolo(image_width, image_height, bbox):
    x1, y1 = bbox[0]
    x2, y2 = bbox[1]
    x_center = (x1 + x2) / 2
    y_center = (y1 + y2) / 2
    w = x2 - x1
    h = y2 - y1
    return (
        x_center / image_width,
        y_center / image_height,
        w / image_width,
        h / image_height,
    )

def convert_annotations(input_labels_dir, input_images_dir, output_dir):
    output_labels_dir = os.path.join(output_dir, "labels")
    output_images_dir = os.path.join(output_dir, "images")
    os.makedirs(output_labels_dir, exist_ok=True)
    os.makedirs(output_images_dir, exist_ok=True)

    for label_file_name in os.listdir(input_labels_dir):
        file_id = label_file_name.removesuffix(".json")
        input_label_path = os.path.join(input_labels_dir, file_id + ".json")
        input_image_path = os.path.join(input_images_dir, file_id + ".jpg")
        output_label_path = os.path.join(output_labels_dir, file_id + ".txt")
        output_image_path = os.path.join(output_images_dir, file_id + ".jpg")
        

        # convert
        label = json.load(open(input_label_path, "r"))
        width = label["images"]["width"]
        height = label["images"]["height"]
        yolos = [
            bbox_to_yolo(width, height, annotation["bbox"])
            for annotation in label["annotations"]
            if annotation["object_class"] == "garbage_bag"
        ]

        # exception case
        if not yolos:
            print(f'Object does not exist in "{label_file_name}"')
            continue

        # image
        shutil.copy(src=input_image_path, dst=output_image_path)

        # label
        with open(output_label_path, "w") as f:
            for yolo in yolos:
                f.write(f"0 {yolo[0]} {yolo[1]} {yolo[2]} {yolo[3]}\n")

In [2]:
# YOLOv5 locates labels automatically for each image by replacing the last instance of /images/ in each image path with /labels/

output_dir = "datasets/yolo_dataset"

training_labels_dir = 'datasets/raw_dataset/train/labels'
training_images_dir = 'datasets/raw_dataset/train/images'

validation_labels_dir = 'datasets/raw_dataset/val/labels'
validation_images_dir = 'datasets/raw_dataset/val/images'

In [3]:
convert_annotations(
    input_labels_dir=training_labels_dir,
    input_images_dir=training_images_dir,
    output_dir=os.path.join(output_dir, "train"),
)

Object does not exist in "9_erip_su_11-10_12-56-56_aft_DF5.json"
Object does not exist in "9_erip_su_11-10_12-57-04_aft_DF5.json"
Object does not exist in "9_ydsp_su_11-11_10-43-42_for_DF5.json"
Object does not exist in "9_ydsp_su_11-11_10-43-58_for_DF5.json"
Object does not exist in "9_ydsp_su_11-11_10-43-52_for_DF5.json"
Object does not exist in "9_ydsp_su_11-11_10-42-56_for_DF5.json"
Object does not exist in "9_erip_su_11-10_12-57-00_aft_DF5.json"
Object does not exist in "9_ydsp_su_11-11_10-44-18_for_DF5.json"


In [4]:
convert_annotations(
    input_labels_dir=validation_labels_dir,
    input_images_dir=validation_images_dir,
    output_dir=os.path.join(output_dir, "val"),
)

Object does not exist in "9_ydsp_su_11-11_10-43-04_for_DF5.json"
Object does not exist in "9_erip_su_11-10_12-09-00_aft_DF5.json"
Object does not exist in "9_ydsp_su_11-11_10-44-04_for_DF5.json"
