In [4]:
# --- Notebook: 03_Export_ImagesNPY_for_CNN.ipynb ---

import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

# 0) Config: final size must match your CNN transform expectation
FINAL_SIZE = (256, 256)  # your CNN resizes to 256 then crops -> consistent

# 1) Load splits + file paths + labels
paths = np.load("filepaths.npy", allow_pickle=True)
labels_encoded = np.load("labels_encoded.npy", allow_pickle=True)
train_idx = np.load("split_train.npy"); val_idx = np.load("split_val.npy"); test_idx = np.load("split_test.npy")
class_names = np.load("class_names.npy", allow_pickle=True)

# 2) Build the full arrays (ALL images) to keep your CNN script unchanged
#    (It loads images.npy and labels.npy, then does its own split.)
imgs = []
for p in tqdm(paths, desc="Resizing & packing images.npy"):
    im = Image.open(p).convert("RGB").resize(FINAL_SIZE, resample=Image.BILINEAR)
    imgs.append(np.array(im, dtype=np.uint8))
images = np.stack(imgs, axis=0)   # shape: (N, 256, 256, 3)

# 3) Save arrays for the CNN script
np.save("images.npy", images)              # uint8
np.save("labels.npy", labels_encoded)      # ints [0..K-1]
np.save("class_names.npy", class_names)    # for later reporting (optional)

print("Saved images.npy (uint8), labels.npy (encoded ints), class_names.npy")

# 4) OPTIONAL: verify split class balance (sanity)
def counts(y, idx):
    import numpy as np
    return np.bincount(y[idx], minlength=len(class_names))
print("Train counts:", counts(labels_encoded, train_idx))
print("Val counts:",   counts(labels_encoded, val_idx))
print("Test counts:",  counts(labels_encoded, test_idx))


Resizing & packing images.npy: 100%|██████████| 4752/4752 [00:16<00:00, 295.77it/s]


Saved images.npy (uint8), labels.npy (encoded ints), class_names.npy
Train counts: [323 287 294 553 347 350 645 222 305]
Val counts: [ 69  62  63 118  74  75 138  48  66]
Test counts: [ 69  62  63 119  74  75 138  48  65]
