In [1]:
import os
import shutil
import random

# ===========================
# CONFIG
# ===========================
SOURCE_DIR = r"C:\Harsh Works\code\American Sign Language\Safe Side\Data"
DEST_DIR = r"C:\Harsh Works\code\American Sign Language\ASL_Split"

# You can tweak these, but keep them reasonable
TRAIN_SPLIT = 0.7
VAL_SPLIT = 0.15
TEST_SPLIT = 0.15

random.seed(42)

# ===========================
# CREATE OUTPUT FOLDERS
# ===========================
classes = [d for d in os.listdir(SOURCE_DIR)
           if os.path.isdir(os.path.join(SOURCE_DIR, d))]

for split in ["train", "val", "test"]:
    for cls in classes:
        os.makedirs(os.path.join(DEST_DIR, split, cls), exist_ok=True)

# ===========================
# SPLIT LOGIC (ensure at least 1 per class per split if possible)
# ===========================
for cls in classes:
    class_path = os.path.join(SOURCE_DIR, cls)
    images = [f for f in os.listdir(class_path)
              if os.path.isfile(os.path.join(class_path, f))]

    if len(images) == 0:
        print(f"[WARN] No images found for class '{cls}', skipping.")
        continue

    random.shuffle(images)
    total = len(images)

    # Start with proportional counts
    train_count = max(1, int(total * TRAIN_SPLIT))
    val_count = max(1, int(total * VAL_SPLIT))
    test_count = max(1, int(total * TEST_SPLIT))

    # Adjust if we overshoot
    while train_count + val_count + test_count > total:
        # reduce from the largest
        if train_count >= val_count and train_count >= test_count and train_count > 1:
            train_count -= 1
        elif val_count >= train_count and val_count >= test_count and val_count > 1:
            val_count -= 1
        elif test_count > 1:
            test_count -= 1
        else:
            break

    # If still not covering all, push leftovers into train
    leftover = total - (train_count + val_count + test_count)
    train_count += leftover

    train_imgs = images[:train_count]
    val_imgs = images[train_count:train_count + val_count]
    test_imgs = images[train_count + val_count:train_count + val_count + test_count]

    # Copy files into train
    for img in train_imgs:
        src = os.path.join(class_path, img)
        dst = os.path.join(DEST_DIR, "train", cls, img)
        shutil.copy(src, dst)

    # Copy files into val
    for img in val_imgs:
        src = os.path.join(class_path, img)
        dst = os.path.join(DEST_DIR, "val", cls, img)
        shutil.copy(src, dst)

    # Copy files into test
    for img in test_imgs:
        src = os.path.join(class_path, img)
        dst = os.path.join(DEST_DIR, "test", cls, img)
        shutil.copy(src, dst)

    print(f"{cls} â†’ Train: {len(train_imgs)} | Val: {len(val_imgs)} | Test: {len(test_imgs)}")

print("\nâœ… Dataset successfully split into train / val / test folders!")
print("ðŸ“‚ Saved at:", DEST_DIR)


Hello â†’ Train: 435 | Val: 92 | Test: 92
I Love You â†’ Train: 408 | Val: 87 | Test: 87
Okay â†’ Train: 340 | Val: 72 | Test: 72
Please â†’ Train: 617 | Val: 131 | Test: 131
Thank you â†’ Train: 346 | Val: 73 | Test: 73
Yes â†’ Train: 370 | Val: 78 | Test: 78

âœ… Dataset successfully split into train / val / test folders!
ðŸ“‚ Saved at: C:\Harsh Works\code\American Sign Language\ASL_Split
