In [3]:
"""
bdd_to_coco.py

Convert a BDD100K subset JSON (object detection only)
to COCO format for YOLOv8 or Detectron2 training.
"""

import json
import os
from tqdm import tqdm

# -----------------------------
# CONFIGURATION
# -----------------------------
BDD_JSON = "../bdd100k_subset_1k/labels.json"      # input (subset)
COCO_JSON = "../bdd100k_subset_1k/coco_annotations.json"  # output
IMG_DIR = "./bdd100k_subset_1k/images"             # image folder (for width/height if needed)

# BDD10K detection classes
CLASSES = [
    "car", "traffic sign", "traffic light", "person", "truck",
    "bus", "bike", "rider", "motor", "train"
]
EXCLUDE_CATS = {"lane", "drivable area"}

# -----------------------------
# HELPER FUNCTIONS
# -----------------------------
def bdd_to_coco(bdd_data, classes):
    coco = {
        "images": [],
        "annotations": [],
        "categories": []
    }

    class_to_id = {cls: i + 1 for i, cls in enumerate(classes)}
    ann_id = 1

    # COCO category list
    for cls, cid in class_to_id.items():
        coco["categories"].append({"id": cid, "name": cls})

    # Iterate over images
    for img_id, item in enumerate(tqdm(bdd_data, desc="Converting BDD → COCO")):
        img_name = item["name"]

        # Add image entry
        coco["images"].append({
            "id": img_id + 1,
            "file_name": img_name,
            "width": 1280,   # BDD images are 1280x720
            "height": 720
        })

        # Iterate over labels
        for lbl in item.get("labels", []):
            cat = lbl.get("category", "").lower()
            if cat in EXCLUDE_CATS or cat not in class_to_id:
                continue

            if "box2d" not in lbl:
                continue

            box = lbl["box2d"]
            x1, y1, x2, y2 = box["x1"], box["y1"], box["x2"], box["y2"]
            w, h = x2 - x1, y2 - y1

            if w <= 0 or h <= 0:
                continue

            coco["annotations"].append({
                "id": ann_id,
                "image_id": img_id + 1,
                "category_id": class_to_id[cat],
                "bbox": [x1, y1, w, h],
                "area": w * h,
                "iscrowd": 0,
                "segmentation": []  # not used for detection
            })
            ann_id += 1

    return coco


# -----------------------------
# MAIN
# -----------------------------
if __name__ == "__main__":
    print(f"📂 Loading BDD subset JSON: {BDD_JSON}")
    with open(BDD_JSON, "r") as f:
        bdd_data = json.load(f)

    print(f"Loaded {len(bdd_data)} image entries.")

    print("🔄 Converting to COCO format...")
    coco_data = bdd_to_coco(bdd_data, CLASSES)

    os.makedirs(os.path.dirname(COCO_JSON), exist_ok=True)
    with open(COCO_JSON, "w") as f:
        json.dump(coco_data, f, indent=2)

    print(f"\n✅ Saved COCO-format annotations to: {COCO_JSON}")
    print(f"Images: {len(coco_data['images'])}, Annotations: {len(coco_data['annotations'])}")

📂 Loading BDD subset JSON: ../bdd100k_subset_1k/labels.json
Loaded 1000 image entries.
🔄 Converting to COCO format...


Converting BDD → COCO: 100%|█| 1000/1000 [00:00



✅ Saved COCO-format annotations to: ../bdd100k_subset_1k/coco_annotations.json
Images: 1000, Annotations: 18551
