In [None]:
import os
import json
import shutil
import random
from collections import defaultdict

# --- Config ---
BASE_DIR = "images"
UNSORTED_DIR = os.path.join(BASE_DIR, "unsorted")
SORTED_DIR = os.path.join(BASE_DIR, "sorted")
TRAIN_DIR = os.path.join(SORTED_DIR, "training")
VAL_DIR = os.path.join(SORTED_DIR, "validation")
LABELS_PATH = os.path.join(BASE_DIR, "db_labels.json")
VAL_SPLIT = 0.2  # 20% for validation

# --- Create Directory Helpers ---
def ensure_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

# --- Step 0: Clear out sorted directory ---
if os.path.exists(SORTED_DIR):
    shutil.rmtree(SORTED_DIR)
ensure_dir(TRAIN_DIR)
ensure_dir(VAL_DIR)

# --- Step 1: Read Labels ---
with open(LABELS_PATH, "r") as f:
    labels = json.load(f)

# Organize by poseId
pose_to_filenames = defaultdict(list)
for entry in labels:
    pose_to_filenames[entry["poseId"]].append(entry["s3Filename"])

# --- Step 2: Sort into training ---
for poseId, files in pose_to_filenames.items():
    train_subdir = os.path.join(TRAIN_DIR, str(poseId))
    ensure_dir(train_subdir)

    for filename in files:
        src = os.path.join(UNSORTED_DIR, filename)
        dst = os.path.join(train_subdir, filename)
        if os.path.exists(src):  # avoid crashing if file missing
            shutil.copy2(src, dst)

# --- Step 3: Create validation split ---
for poseId in pose_to_filenames:
    train_subdir = os.path.join(TRAIN_DIR, str(poseId))
    val_subdir = os.path.join(VAL_DIR, str(poseId))
    ensure_dir(val_subdir)

    all_train_images = os.listdir(train_subdir)
    val_images = random.sample(all_train_images, int(len(all_train_images) * VAL_SPLIT))

    for img in val_images:
        src = os.path.join(train_subdir, img)
        dst = os.path.join(val_subdir, img)
        shutil.move(src, dst)

print("✅ Images sorted into training and validation sets.")
