# Prepare Labellerr COCO dataset for YOLOv8-seg (easy notebook)

This notebook helps you convert a single COCO JSON file (`export_coco.json`) and a folder with images into a `dataset/` layout ready for YOLOv8 segmentation training.

**How to use (easy):**
1. Upload your `export_coco.json` and your images folder to Google Drive.
2. Edit the two path variables in the first code cell.
3. Run cells sequentially (select `Runtime -> Change runtime type -> GPU` if you plan to train after).



In [None]:
# Quick fix: set paths and check files. Edit the two paths below.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)   # mounts your Drive

COCO_JSON_PATH = "/content/drive/MyDrive/export_coco.json"   # <- change if your json is elsewhere
IMAGES_SRC_DIR  = "/content/drive/MyDrive/cars_persons_images"  # <- change to folder with your images

OUTPUT_ROOT = "/content/dataset_coco_labellerr_easy"

print("COCO JSON exists?", os.path.exists(COCO_JSON_PATH))
print("Images dir exists?", os.path.exists(IMAGES_SRC_DIR))
# list first 10 image files to verify
import os
imgs = [f for f in os.listdir(IMAGES_SRC_DIR) if f.lower().endswith(('.jpg','.jpeg','.png'))]
print("Number of images found:", len(imgs))
print("First 10 image names:", imgs[:10])


MessageError: Error: credential propagation was unsuccessful

In [None]:
# Mount Drive and set paths - EDIT these two paths only
from google.colab import drive
drive.mount('/content/drive')

# === EDIT THESE PATHS ===
COCO_JSON_PATH = "C:\Users\User\Downloads\export-#VIrgMxUgk9LN2pH0qNwN (1).json"   # path to your COCO JSON from Labellerr
IMAGES_SRC_DIR  = "E:\labler"  # folder containing all images (.jpg/.png)
# ========================

OUTPUT_ROOT = "/content/dataset_coco_labellerr_easy"
print("COCO_JSON_PATH:", COCO_JSON_PATH)
print("IMAGES_SRC_DIR:", IMAGES_SRC_DIR)
print("OUTPUT_ROOT:", OUTPUT_ROOT)


In [None]:
# Install small dependencies
!pip install -q tqdm pycocotools ultralytics
print('Installed tqdm, pycocotools, ultralytics')

In [None]:
# Create train/val/test splits and COCO jsons (easy)
import json, os, shutil, random
from pathlib import Path
from tqdm import tqdm

random.seed(42)

TRAIN_RATIO = 0.7
VAL_RATIO   = 0.15
TEST_RATIO  = 0.15

# Load COCO JSON
with open(COCO_JSON_PATH, 'r') as f:
    coco = json.load(f)

images_info = coco.get('images', [])
anns = coco.get('annotations', [])
categories = coco.get('categories', [])

if len(images_info) == 0:
    raise SystemExit("No images in JSON. Make sure your COCO JSON contains 'images' entries.")

img_by_file = {im['file_name']: im for im in images_info}
src_files = [f for f in os.listdir(IMAGES_SRC_DIR) if f.lower().endswith(('.jpg','.jpeg','.png','.tiff'))]

common_files = [f for f in src_files if f in img_by_file]
if len(common_files) == 0:
    raise SystemExit("No common filenames between JSON and images folder. Check names (case-sensitive).")

random.shuffle(common_files)
n = len(common_files)
n_train = int(n * TRAIN_RATIO)
n_val   = int(n * VAL_RATIO)
n_test  = n - n_train - n_val

train_files = common_files[:n_train]
val_files   = common_files[n_train:n_train+n_val]
test_files  = common_files[n_train+n_val:]

print('Counts ->', len(train_files), 'train,', len(val_files), 'val,', len(test_files), 'test')

# create structure and copy files
for split, files in [('train', train_files), ('val', val_files), ('test', test_files)]:
    dst_dir = Path(OUTPUT_ROOT) / 'images' / split
    dst_dir.mkdir(parents=True, exist_ok=True)
    for fname in tqdm(files, desc=f'Copying {split}'):
        shutil.copy2(Path(IMAGES_SRC_DIR)/fname, dst_dir / fname)

def build_coco_for(split_files):
    imgs = [img_by_file[fname] for fname in split_files]
    img_ids = {img['id'] for img in imgs}
    anns_f = [a for a in anns if a['image_id'] in img_ids]
    return {"images": imgs, "annotations": anns_f, "categories": categories}

ann_dir = Path(OUTPUT_ROOT) / 'annotations'
ann_dir.mkdir(parents=True, exist_ok=True)

with open(ann_dir / 'train.json', 'w') as f:
    json.dump(build_coco_for(train_files), f)
with open(ann_dir / 'val.json', 'w') as f:
    json.dump(build_coco_for(val_files), f)
with open(ann_dir / 'test.json', 'w') as f:
    json.dump(build_coco_for(test_files), f)

print('Wrote split JSONs to', ann_dir)


In [None]:
# Write data.yaml compatible with YOLOv8 (COCO-style)
from pathlib import Path
out = Path(OUTPUT_ROOT)
cats = {c['id']: c['name'] for c in categories}
sorted_cats = [cats[k] for k in sorted(cats.keys())]
data_yaml = {
    "train": str(out / "annotations" / "train.json"),
    "val":   str(out / "annotations" / "val.json"),
    "test":  str(out / "annotations" / "test.json"),
    "nc": len(sorted_cats),
    "names": sorted_cats
}
import yaml
with open(out / 'data.yaml', 'w') as f:
    yaml.dump(data_yaml, f)
print('Wrote data.yaml at', out / 'data.yaml')
print(data_yaml)

In [None]:
# Quick verify
!ls -R /content/dataset_coco_labellerr_easy | sed -n '1,200p'

In [None]:
# Example: start a short training run (uncomment to run)
# from ultralytics import YOLO
# model = YOLO('yolov8n-seg.pt')
# model.train(data='/content/dataset_coco_labellerr_easy/data.yaml', epochs=10, imgsz=640, batch=8, name='labellerr_easy_test')
print('If you want to train, uncomment the training block and run this cell (make sure GPU runtime).')