In [None]:
import os
import cv2
import numpy as np
from PIL import Image
import yaml

def convert_instance_mask_to_yolo_txt(mask_path, image_shape, output_path, class_id=0, epsilon_ratio=0.01):
    mask = np.array(Image.open(mask_path))
    h, w = image_shape

    instance_ids = np.unique(mask)
    instance_ids = instance_ids[instance_ids != 0]

    lines = []
    for inst_id in instance_ids:
        inst_mask = (mask == inst_id).astype(np.uint8)
        contours, _ = cv2.findContours(inst_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
        for contour in contours:
            if len(contour) < 5:
                continue
            epsilon = epsilon_ratio * cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, epsilon, True)
            approx = approx.reshape(-1, 2)
            norm = approx / np.array([w, h])
            flat = norm.flatten().tolist()
            line = f"{class_id} " + " ".join(f"{x:.6f}" for x in flat)
            lines.append(line)

    with open(output_path, "w") as f:
        f.write("\n".join(lines))

def generate_yolo_dataset(image_dir, mask_dir, label_dir, yaml_path, class_name="GobletCell"):
    os.makedirs(label_dir, exist_ok=True)
    image_files = [f for f in os.listdir(image_dir) if f.endswith(".png")]

    for fname in image_files:
        image_path = os.path.join(image_dir, fname)
        mask_path = os.path.join(mask_dir, fname)
        label_path = os.path.join(label_dir, fname.replace(".png", ".txt"))

        if not os.path.exists(mask_path):
            print(f"[Missing mask] {fname}")
            continue

        img = cv2.imread(image_path)
        h, w = img.shape[:2]
        convert_instance_mask_to_yolo_txt(mask_path, (h, w), label_path)

    # Write data.yaml
    data_yaml = {
        "path": os.path.abspath(os.path.dirname(image_dir)),
        "train": os.path.basename(image_dir),
        "val": os.path.basename(image_dir),  # adjust if separate val set
        "names": [class_name],
        "nc": 1
    }

    with open(yaml_path, "w") as f:
        yaml.dump(data_yaml)

    print(f"YOLO dataset prepared with {len(image_files)} images. YAML saved to {yaml_path}")

# Example usage
generate_yolo_dataset(
    image_dir="insert_path",
    mask_dir="insert_path",
    label_dir="insert_path",
    yaml_path="insert_path"
)