In [20]:
import os
import shutil
import yaml

CURRENT_DIR = os.path.join(os.getcwd(), "generate_datasets") # yolov8_training/notebooks/generate_datasets
print("Current directory: {}".format(CURRENT_DIR))
BASE_DIR = os.path.abspath(os.path.join(CURRENT_DIR, "..", "..", "..")) # yolov8_training
print("Base directory: {}".format(BASE_DIR))
DATASETS_DIR = os.path.join(BASE_DIR, "datasets") # yolov8_training/datasets
print("Dataset directory: {}".format(DATASETS_DIR))

os.makedirs(DATASETS_DIR, exist_ok=True)

RAW_CLASSES_DIR = os.path.join(DATASETS_DIR, "raw_all_classes") # yolov8_training/datasets/raw_all_classes
print("Raw classes directory: {}".format(RAW_CLASSES_DIR))
COIN_DIR = os.path.join(DATASETS_DIR, "coin") # yolov8_training/datasets/coin
print("Coin directory: {}".format(COIN_DIR))

os.makedirs(COIN_DIR, exist_ok=True)

for split in ["train", "val", "test"]:
    for sub in ["images", "labels"]:
        os.makedirs(os.path.join(COIN_DIR, split, sub), exist_ok=True)

with open(os.path.join(RAW_CLASSES_DIR, "data.yaml"), "r") as f:
    raw_data = yaml.safe_load(f)

coin_names = {i: name for i, name in enumerate(raw_data["names"]) if "coin" in name.lower()}
print(f"ðŸŽ¯ Coin classes found: {list(coin_names.values())}")
def filter_coin_labels(label_path):
    lines = []
    with open(label_path, "r") as f:
        for line in f:
            cls, *rest = line.strip().split()
            cls = int(cls)
            name = raw_data["names"][cls]
            if "coin" in name.lower():
                new_cls = list(coin_names.keys()).index(cls)
                lines.append(f"{new_cls} {' '.join(rest)}\n")
    return lines

for split in ["train", "valid", "test"]:
    raw_images = os.path.join(RAW_CLASSES_DIR, split, "images")
    raw_labels = os.path.join(RAW_CLASSES_DIR, split, "labels")
    out_split = "val" if split=="valid" else split
    out_images = os.path.join(COIN_DIR, out_split, "images")
    out_labels = os.path.join(COIN_DIR, out_split, "labels")
    for fname in os.listdir(raw_labels):
        label_path = os.path.join(raw_labels, fname)
        mapped_lines = filter_coin_labels(label_path)
        if not mapped_lines: continue
        shutil.copy(os.path.join(raw_images, fname.replace(".txt", ".jpg")), out_images)
        with open(os.path.join(out_labels, fname), "w") as f:
            f.writelines(mapped_lines)

coin_yaml = {
    "train": "datasets/coin/train/images",
    "val": "datasets/coin/val/images",
    "test": "datasets/coin/test/images",
    "nc": len(coin_names),
    "names": {i:name for i,name in enumerate(coin_names.values())}
}

with open(os.path.join(COIN_DIR, "data.yaml"), "w") as f:
    yaml.dump(coin_yaml, f)

print("âœ… Coin dataset generated.")

Current directory: D:\CurrencyDetectorApp\yolov8_training\notebooks\generate_datasets\generate_datasets
Base directory: D:\CurrencyDetectorApp\yolov8_training
Dataset directory: D:\CurrencyDetectorApp\yolov8_training\datasets
Raw classes directory: D:\CurrencyDetectorApp\yolov8_training\datasets\raw_all_classes
Coin directory: D:\CurrencyDetectorApp\yolov8_training\datasets\coin
ðŸŽ¯ Coin classes found: ['10_coin', '1_coin', '2_coin', '50_coin', '5_coin']
âœ… Coin dataset generated.
