In [7]:
import random
from pathlib import Path
from shutil import move


try:
    PROJECT_ROOT = Path(__file__).resolve().parent.parent
except NameError:
    PROJECT_ROOT = Path.cwd().parent

BASE_DIR = PROJECT_ROOT / "farm_backgrounds"
SEED = 4896
SPLITS = {"train": 0.6, "val": 0.2, "test": 0.2}
ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png"}

def main():
    if not BASE_DIR.exists():
        print(f"Error: {BASE_DIR} does not exist.")
        return

    all_images = [
        p for p in BASE_DIR.iterdir()
        if p.suffix.lower() in ALLOWED_EXTENSIONS
        and p.is_file()
    ]

    total = len(all_images)
    if total < 10:
        print("Not enough images to split. Found:", total)
        return

    print(f"Found {total} images. Splitting using seed {SEED}...")

    random.seed(SEED)
    random.shuffle(all_images)

    n_train = int(total * SPLITS["train"])
    n_val = int(total * SPLITS["val"])
    n_test = total - n_train - n_val

    split_map = {
        "train": all_images[:n_train],
        "val": all_images[n_train:n_train + n_val],
        "test": all_images[n_train + n_val:]
    }

    for split_name, files in split_map.items():
        split_dir = BASE_DIR / split_name
        split_dir.mkdir(exist_ok=True)
        print(f"→ {split_name.upper()}: {len(files)} images")
        for file_path in files:
            move(str(file_path), str(split_dir / file_path.name))

    print("Split complete.")

if __name__ == "__main__":
    main()


Found 500 images. Splitting using seed 4896...
→ TRAIN: 300 images
→ VAL: 100 images
→ TEST: 100 images
✅ Split complete.
