In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

In [5]:
# ========= 🛠️ CONFIG =========
image_folder = '..\data\processed\cropped_lids'  # change if needed
save_folder = '..\data\processed\Dataset_365'
img_size = (128, 128)  # resize target

In [7]:
# ========= 📥 Load & Label =========
images = []
labels = []

for fname in os.listdir(image_folder):
    if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
        full_path = os.path.join(image_folder, fname)

        # Labeling: 'intact' => 0, 'damaged' => 1
        if 'intact' in fname:
            label = 0
        elif 'damaged' in fname:
            label = 1
        else:
            continue  # skip unknown label types

        img = cv2.imread(full_path, cv2.IMREAD_COLOR)
        if img is None:
            print(f"⚠️ Could not read {fname}, skipping.")
            continue

        # Resize to common shape
        img = cv2.resize(img, img_size)

        images.append(img)
        labels.append(label)

print(f"✅ Loaded {len(images)} images.")

✅ Loaded 1859 images.


In [9]:
# ========= 🧮 Convert to NumPy =========
X = np.array(images, dtype=np.float32) / 255.0  # normalize to [0, 1]
y = np.array(labels, dtype=np.uint8)

In [11]:
# ========= ✂️ Split =========
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

In [13]:
print(f"📊 Shapes: Train={X_train.shape}, Val={X_val.shape}, Test={X_test.shape}")

📊 Shapes: Train=(1301, 128, 128, 3), Val=(279, 128, 128, 3), Test=(279, 128, 128, 3)


In [15]:
# ========= 💾 Save =========
os.makedirs(save_folder, exist_ok=True)

np.savez_compressed(f"{save_folder}/jarlid_train.npz", X=X_train, y=y_train)
np.savez_compressed(f"{save_folder}/jarlid_val.npz", X=X_val, y=y_val)
np.savez_compressed(f"{save_folder}/jarlid_test.npz", X=X_test, y=y_test)

print("🎉 All datasets saved to:", save_folder)

🎉 All datasets saved to: ..\data\processed\Dataset_365
