In [2]:
import os
import shutil
import yaml

CURRENT_DIR = os.path.join(os.getcwd(), "generate_datasets") # yolov8_training/notebooks/generate_datasets
print("Current directory: {}".format(CURRENT_DIR))
BASE_DIR = os.path.abspath(os.path.join(CURRENT_DIR, "..", "..", "..")) # yolov8_training
print("Base directory: {}".format(BASE_DIR))
DATASETS_DIR = os.path.join(BASE_DIR, "datasets") # yolov8_training/datasets
print("Dataset directory: {}".format(DATASETS_DIR))

os.makedirs(DATASETS_DIR, exist_ok=True)

RAW_CLASSES_DIR = os.path.join(DATASETS_DIR, "raw_all_classes") # yolov8_training/datasets/raw_all_classes
print("Raw classes directory: {}".format(RAW_CLASSES_DIR))
BINARY_DIR = os.path.join(DATASETS_DIR, "binary") # yolov8_training/datasets/binary
print("Binary directory: {}".format(BINARY_DIR))

os.makedirs(BINARY_DIR, exist_ok=True)

for split in ["train", "val", "test"]:
    for sub in ["images", "labels"]:
        os.makedirs(os.path.join(BINARY_DIR, split, sub), exist_ok=True)

with open(os.path.join(RAW_CLASSES_DIR, "data.yaml"), "r") as f:
    raw_data = yaml.safe_load(f)

binary_names = {0: "coin", 1: "note"}

def map_label_to_binary(label_path):
    lines = []
    with open(label_path, "r") as f:
        for line in f:
            cls, *rest = line.strip().split()
            cls = int(cls)
            name = raw_data["names"][cls]
            if "coin" in name.lower():
                lines.append(f"0 {' '.join(rest)}\n")
            elif "note" in name.lower():
                lines.append(f"1 {' '.join(rest)}\n")
    return lines

for split in ["train", "valid", "test"]:
    raw_images = os.path.join(RAW_CLASSES_DIR, split, "images")
    raw_labels = os.path.join(RAW_CLASSES_DIR, split, "labels")
    out_split = "val" if split=="valid" else split
    out_images = os.path.join(BINARY_DIR, out_split, "images")
    out_labels = os.path.join(BINARY_DIR, out_split, "labels")
    for fname in os.listdir(raw_labels):
        label_path = os.path.join(raw_labels, fname)
        mapped_lines = map_label_to_binary(label_path)
        if not mapped_lines: continue
        shutil.copy(os.path.join(raw_images, fname.replace(".txt", ".jpg")), out_images)
        with open(os.path.join(out_labels, fname), "w") as f:
            f.writelines(mapped_lines)

binary_yaml = {
    "train": "train/images",
    "val": "val/images",
    "test": "test/images",
    "nc": 2,
    "names": binary_names
}

with open(os.path.join(BINARY_DIR, "data.yaml"), "w") as f:
    yaml.dump(binary_yaml, f)

print("✅ Binary dataset generated.")

Current directory: D:\CurrencyDetectorApp\yolov8_training\notebooks\generate_datasets\generate_datasets
Base directory: D:\CurrencyDetectorApp\yolov8_training
Dataset directory: D:\CurrencyDetectorApp\yolov8_training\datasets
Raw classes directory: D:\CurrencyDetectorApp\yolov8_training\datasets\raw_all_classes
Binary directory: D:\CurrencyDetectorApp\yolov8_training\datasets\binary
✅ Binary dataset generated.
