In [8]:
import ultralytics
ultralytics.checks()
import json
import os
from sklearn.model_selection import train_test_split
import shutil
from ultralytics import YOLO

Ultralytics 8.3.146  Python-3.11.9 torch-2.6.0+cu118 CUDA:0 (GeForce GTX 1650, 4096MiB)
Setup complete  (12 CPUs, 15.9 GB RAM, 136.9/931.5 GB disk)


In [None]:
# data: 
# download the dataset from https://datasetninja.com/mju-waste#download and place in folder 'data/mju-waste-COCO
# clone the instances.json files from https://github.com/realwecan/mju-waste and place in folder 'data/mju-waste-COCO/annotations'

In [3]:
def coco_to_yolo(coco_json_path, image_dir, output_label_dir):
    """
    Convert COCO-format annotations to YOLO format for given dataset layout.
    
    Args:
        coco_json_path (str): Path to COCO .json file (e.g., test/ann/instances_test.json)
        image_dir (str): Path to images (e.g., test/img/)
        output_label_dir (str): Output directory for YOLO .txt files (e.g., test/labels/)
    """
    os.makedirs(output_label_dir, exist_ok=True)

    with open(coco_json_path, 'r') as f:
        coco = json.load(f)

    image_map = {img['id']: img for img in coco['images']}
    category_id_to_class_id = {cat['id']: i for i, cat in enumerate(coco['categories'])}

    # Group annotations per image
    annotations_per_image = {}
    for ann in coco['annotations']:
        image_id = ann['image_id']
        annotations_per_image.setdefault(image_id, []).append(ann)

    for image_id, anns in annotations_per_image.items():
        image_info = image_map[image_id]
        img_w, img_h = image_info['width'], image_info['height']
        img_filename = image_info['file_name']
        base_name = os.path.splitext(img_filename)[0]
        yolo_label_path = os.path.join(output_label_dir, base_name + ".txt")

        lines = []
        for ann in anns:
            if ann.get("iscrowd", 0):
                continue
            x, y, w, h = ann["bbox"]
            x_center = (x + w / 2) / img_w
            y_center = (y + h / 2) / img_h
            w_norm = w / img_w
            h_norm = h / img_h
            class_id = category_id_to_class_id[ann["category_id"]]
            lines.append(f"{class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}")

        with open(yolo_label_path, "w") as f:
            f.write("\n".join(lines))

    print(f"✅ Converted COCO to YOLO: {len(annotations_per_image)} files written to {output_label_dir}")


In [5]:
coco_to_yolo(
    coco_json_path="../data/mju-waste-COCO/annotations/test.json",
    image_dir="../data/mju-waste-COCO/test/img",
    output_label_dir="../data/mju-YOLO/test/labels"
)
coco_to_yolo(
    coco_json_path="../data/mju-waste-COCO/annotations/train.json",
    image_dir="../data/mju-waste-COCO/train/img",
    output_label_dir="../data/mju-YOLO/train/labels"
)
coco_to_yolo(
    coco_json_path="../data/mju-waste-COCO/annotations/val.json",
    image_dir="../data/mju-waste-COCO/val/img",
    output_label_dir="../data/mju-YOLO/val/labels"
)

✅ Converted COCO to YOLO: 742 files written to ../data/mju-YOLO/test/labels
✅ Converted COCO to YOLO: 1477 files written to ../data/mju-YOLO/train/labels
✅ Converted COCO to YOLO: 248 files written to ../data/mju-YOLO/val/labels


In [18]:
yaml_path = 'data.yaml'

# Create data.yaml
with open(yaml_path, 'w') as f:
    f.write(f"test: {os.path.abspath('test/images')}\n")
    f.write(f"train: {os.path.abspath('train/images')}\n")
    f.write(f"val: {os.path.abspath('val/images')}\n")
    f.write("nc: 1\n") #number of classes in the dataset.
    categories = json.load(open('../mju-waste-COCO/annotations/train.json', 'r'))['categories']
    f.write("names: " + str([cat['name'] for cat in categories]) + "\n")
