In [2]:
import os
import shutil
import random
from tqdm import tqdm

def split_dataset(input_dir, output_dir, train_ratio=0.7, test_ratio=0.2, val_ratio=0.1):
    assert abs(train_ratio + test_ratio + val_ratio - 1.0) < 1e-5, "Ratios must sum to 1"

    print(f"📂 Reading from: {input_dir}")
    print(f"💾 Saving to: {output_dir}")
    os.makedirs(output_dir, exist_ok=True)

    for split in ['train', 'test', 'val']:
        os.makedirs(os.path.join(output_dir, split), exist_ok=True)

    class_folders = [f for f in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, f))]

    for class_name in class_folders:
        input_class_path = os.path.join(input_dir, class_name)

        images = [f for f in os.listdir(input_class_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        if not images:
            print(f"⚠️ No images found in class '{class_name}', skipping.")
            continue

        random.shuffle(images)
        total = len(images)
        train_end = int(train_ratio * total)
        test_end = train_end + int(test_ratio * total)

        splits = {
            'train': images[:train_end],
            'test': images[train_end:test_end],
            'val': images[test_end:]
        }

        for split, split_images in splits.items():
            split_class_path = os.path.join(output_dir, split, class_name)
            os.makedirs(split_class_path, exist_ok=True)

            for img in tqdm(split_images, desc=f"{split.upper()} | {class_name}", leave=False):
                src = os.path.join(input_class_path, img)
                dst = os.path.join(split_class_path, img)
                shutil.copy(src, dst)

        print(f"✅ Done: '{class_name}' → Train={train_end}, Test={test_end - train_end}, Val={total - test_end}")

    print("🎉 Split complete: 70% Train, 20% Test, 10% Val")

if __name__ == "__main__":
    input_path = "/home/hassaan-ahmad/augmented dataset"
    output_path = "/home/hassaan-ahmad/aug_data_train_test"
    split_dataset(input_path, output_path)


📂 Reading from: /home/hassaan-ahmad/augmented dataset
💾 Saving to: /home/hassaan-ahmad/aug_data_train_test


                                                                                                                                                                                        

✅ Done: 'Healthy' → Train=2412, Test=689, Val=345


                                                                                                                                                                                        

✅ Done: 'Leaf Rust' → Train=1437, Test=410, Val=207


                                                                                                                                                                                        

✅ Done: 'Yellow Rust' → Train=1952, Test=558, Val=280


                                                                                                                                                                                        

✅ Done: 'Loose Smut' → Train=1297, Test=370, Val=187


                                                                                                                                                                                        

✅ Done: 'Septoria' → Train=624, Test=178, Val=90


                                                                                                                                                                                        

✅ Done: 'Brown Rust' → Train=1758, Test=502, Val=252


                                                                                                                                                                                        

✅ Done: 'Stripe Rust' → Train=291, Test=83, Val=42
🎉 Split complete: 70% Train, 20% Test, 10% Val


