In [2]:
import os

DATASET_DIR = "preprocessed_images"   # change if needed

# First pass: rename to temporary names to avoid conflicts
for folder in os.listdir(DATASET_DIR):
    if folder.startswith("box_"):
        old_path = os.path.join(DATASET_DIR, folder)
        temp_path = os.path.join(DATASET_DIR, f"tmp_{folder}")
        os.rename(old_path, temp_path)

# Second pass: rename to zero-padded format
for folder in os.listdir(DATASET_DIR):
    if folder.startswith("tmp_box_"):
        num = int(folder.split('_')[-1])
        new_name = f"{num:03d}"   # 3 digits → supports up to 999 classes
        old_path = os.path.join(DATASET_DIR, folder)
        new_path = os.path.join(DATASET_DIR, new_name)
        os.rename(old_path, new_path)

print("Folder renaming complete.")


Folder renaming complete.


In [3]:
import os
import shutil
import random
import math

SRC_DIR = "preprocessed_images"
DEST_DIR = "dataset"

IMG_EXTS = (".png", ".jpg", ".jpeg", ".bmp")

os.makedirs(f"{DEST_DIR}/train", exist_ok=True)
os.makedirs(f"{DEST_DIR}/val", exist_ok=True)

for label in sorted(os.listdir(SRC_DIR)):
    label_path = os.path.join(SRC_DIR, label)
    if not os.path.isdir(label_path):
        continue

    images = [
        f for f in os.listdir(label_path)
        if os.path.isfile(os.path.join(label_path, f))
        and f.lower().endswith(IMG_EXTS)
    ]

    n = len(images)
    if n < 2:
        print(f"⚠️ Skipping {label}, only {n} image")
        continue

    random.shuffle(images)

    val_count = max(1, math.ceil(0.2 * n))
    train_count = n - val_count

    train_imgs = images[:train_count]
    val_imgs = images[train_count:]

    os.makedirs(f"{DEST_DIR}/train/{label}", exist_ok=True)
    os.makedirs(f"{DEST_DIR}/val/{label}", exist_ok=True)

    for img in train_imgs:
        shutil.copy(
            os.path.join(label_path, img),
            os.path.join(f"{DEST_DIR}/train/{label}", img)
        )

    for img in val_imgs:
        shutil.copy(
            os.path.join(label_path, img),
            os.path.join(f"{DEST_DIR}/val/{label}", img)
        )

print("✅ Robust stratified split done (≥1 train & val per class)")


⚠️ Skipping 013, only 7 images
⚠️ Skipping 014, only 7 images
⚠️ Skipping 043, only 7 images
⚠️ Skipping 086, only 7 images
⚠️ Skipping 097, only 7 images
⚠️ Skipping 106, only 7 images
⚠️ Skipping 117, only 7 images
⚠️ Skipping 118, only 7 images
⚠️ Skipping 119, only 7 images
⚠️ Skipping 129, only 7 images
⚠️ Skipping 130, only 5 images
⚠️ Skipping 142, only 7 images
⚠️ Skipping 153, only 7 images
⚠️ Skipping 154, only 7 images
⚠️ Skipping 160, only 7 images
⚠️ Skipping 164, only 7 images
⚠️ Skipping 165, only 7 images
⚠️ Skipping 166, only 5 images
⚠️ Skipping 178, only 5 images
⚠️ Skipping 185, only 7 images
⚠️ Skipping 190, only 6 images
⚠️ Skipping 199, only 7 images
⚠️ Skipping 201, only 6 images
⚠️ Skipping 202, only 5 images
⚠️ Skipping 213, only 5 images
⚠️ Skipping 214, only 6 images
⚠️ Skipping 216, only 6 images
⚠️ Skipping 217, only 7 images
⚠️ Skipping 218, only 7 images
⚠️ Skipping 219, only 7 images
⚠️ Skipping 220, only 7 images
⚠️ Skipping 228, only 6 images
⚠️ Skipp