In [1]:
# Husayn El Sharif
# Create example_images from test dataset

In [2]:
import os
import shutil
import pandas as pd
from pathlib import Path

# Reproducibility
RANDOM_SEED = 42
N_PER_CLASS = 3

# Paths
CSV_PATH = "test_results_with_predictions_tensorflow_20260104-134551.csv"
SOURCE_PREFIX = "images"
TARGET_PREFIX = "example_images"


In [3]:
df = pd.read_csv(CSV_PATH)

required_cols = {"True_Label", "Image_Path"}
missing = required_cols - set(df.columns)
if missing:
    raise ValueError(f"Missing required columns: {missing}")

df.head()


Unnamed: 0,True_Label,Prediction_Label,Image_Path
0,cataract,cataract,images/cataract/111_9826667.jpg
1,cataract,cataract,images/cataract/118_6296686.jpg
2,cataract,cataract,images/cataract/130_3561448.jpg
3,cataract,cataract,images/cataract/130_7837321.jpg
4,cataract,normal,images/cataract/1415_left.jpg


In [4]:
sampled_df = (
    df
    .groupby("True_Label", group_keys=False)
    .apply(lambda x: x.sample(n=min(N_PER_CLASS, len(x)), random_state=RANDOM_SEED))
    .reset_index(drop=True)
)

sampled_df["True_Label"].value_counts()


  .apply(lambda x: x.sample(n=min(N_PER_CLASS, len(x)), random_state=RANDOM_SEED))


True_Label
cataract                3
diabetic_retinopathy    3
glaucoma                3
normal                  3
Name: count, dtype: int64

In [5]:
copied_files = []

for _, row in sampled_df.iterrows():
    src_path = Path(row["Image_Path"])

    if not src_path.exists():
        print(f"⚠️ File not found, skipping: {src_path}")
        continue

    # Replace "images/..." → "example_images/..."
    try:
        relative_path = src_path.relative_to(SOURCE_PREFIX)
    except ValueError:
        print(f"⚠️ Path does not start with '{SOURCE_PREFIX}', skipping: {src_path}")
        continue

    dst_path = Path(TARGET_PREFIX) / relative_path

    # Create parent directories
    dst_path.parent.mkdir(parents=True, exist_ok=True)

    # Copy file
    shutil.copy2(src_path, dst_path)
    copied_files.append(dst_path)

print(f"\n✅ Copied {len(copied_files)} images to '{TARGET_PREFIX}/'")



✅ Copied 12 images to 'example_images/'


In [6]:
for path in copied_files:
    print(path)


example_images/cataract/2228_right.jpg
example_images/cataract/2239_left.jpg
example_images/cataract/1415_left.jpg
example_images/diabetic_retinopathy/10511_left.jpg
example_images/diabetic_retinopathy/10891_right.jpg
example_images/diabetic_retinopathy/10891_left.jpg
example_images/glaucoma/395_9075896.jpg
example_images/glaucoma/387_7675254.jpg
example_images/glaucoma/377_2444638.jpg
example_images/normal/2860_left.jpg
example_images/normal/1060_left.jpg
example_images/normal/2712_right.jpg
