In [None]:
# Husayn El Sharif
# Create example_images from test dataset

In [None]:
import os
import shutil
import pandas as pd
from pathlib import Path

# Reproducibility
RANDOM_SEED = 42
N_PER_CLASS = 3

# Paths
CSV_PATH = "test_results_with_predictions_tensorflow_20260104-134551.csv"
SOURCE_PREFIX = "images"
TARGET_PREFIX = "example_images"


In [None]:
df = pd.read_csv(CSV_PATH)

required_cols = {"True_Label", "Image_Path"}
missing = required_cols - set(df.columns)
if missing:
    raise ValueError(f"Missing required columns: {missing}")

df.head()


In [None]:
sampled_df = (
    df
    .groupby("True_Label", group_keys=False)
    .apply(lambda x: x.sample(n=min(N_PER_CLASS, len(x)), random_state=RANDOM_SEED))
    .reset_index(drop=True)
)

sampled_df["True_Label"].value_counts()


In [None]:
copied_files = []

for _, row in sampled_df.iterrows():
    src_path = Path(row["Image_Path"])

    if not src_path.exists():
        print(f"⚠️ File not found, skipping: {src_path}")
        continue

    # Replace "images/..." → "example_images/..."
    try:
        relative_path = src_path.relative_to(SOURCE_PREFIX)
    except ValueError:
        print(f"⚠️ Path does not start with '{SOURCE_PREFIX}', skipping: {src_path}")
        continue

    dst_path = Path(TARGET_PREFIX) / relative_path

    # Create parent directories
    dst_path.parent.mkdir(parents=True, exist_ok=True)

    # Copy file
    shutil.copy2(src_path, dst_path)
    copied_files.append(dst_path)

print(f"\n✅ Copied {len(copied_files)} images to '{TARGET_PREFIX}/'")


In [None]:
for path in copied_files:
    print(path)
