In [32]:
# ------------------------------
# Cell 1: Imports and Path Setup
# ------------------------------
from pathlib import Path
import cv2
import numpy as np

BASE_DIR = Path("/home/ulixes/segmentation_cv/data_augmentation/data/processed")

paths_to_check = {
    "Train": {
        "images": BASE_DIR / "Train" / "resized",
        "masks": BASE_DIR / "Train" / "resized_label"
    },
    "Val": {
        "images": BASE_DIR / "Val" / "resized",
        "masks": None  # No mask checking for Val
    },
    "Test": {
        "images": BASE_DIR / "Test" / "resized",
        "masks": None  # No mask checking for Test
    }
}


In [33]:
# ------------------------------
# Cell 2: Verification Function
# ------------------------------
def verify_resized_images(image_dir, mask_dir=None):
    image_paths = sorted(image_dir.glob("*.jpg"))
    total_images = len(image_paths)
    passed_images = 0

    for img_path in image_paths:
        # Load image and verify dimensions
        img = cv2.imread(str(img_path))
        if img is None:
            print(f"❌ Failed to load image: {img_path}")
            continue

        if img.shape[:2] != (512, 512):
            print(f"❌ Incorrect image shape {img.shape[:2]}: {img_path}")
            continue

        if mask_dir:
            mask_path = mask_dir / img_path.name.replace(".jpg", ".png")
            if not mask_path.exists():
                print(f"❌ Missing mask for: {img_path.name}")
                continue

            mask = cv2.imread(str(mask_path), cv2.IMREAD_UNCHANGED)
            if mask is None:
                print(f"❌ Failed to read mask: {mask_path}")
                continue

            if mask.ndim == 3:
                mask = mask[:, :, 0]

            if mask.shape[:2] != (512, 512):
                print(f"❌ Incorrect mask shape {mask.shape[:2]}: {mask_path}")
                continue

            unique_vals = np.unique(mask)
            if not (np.array_equal(unique_vals, [0, 1, 255]) or np.array_equal(unique_vals, [0, 2, 255])):
                print(f"⚠️ Unexpected mask values in {mask_path.name}: {unique_vals}")
                continue

        passed_images += 1

    return total_images, passed_images


In [34]:
# ------------------------------
# Cell 3: Run Checks and Print Summary
# ------------------------------
for split, paths in paths_to_check.items():
    print(f"\n🔍 Checking {split} set...")

    total, passed = verify_resized_images(paths["images"], paths["masks"])
    
    print(f"📦 Total images found: {total}")
    print(f"✅ Passed checks: {passed}")
    print(f"🧮 Coverage: {passed}/{total} ({(passed / total * 100):.2f}%)")

print("\n🎉 Done!")



🔍 Checking Train set...
📦 Total images found: 2939
✅ Passed checks: 2939
🧮 Coverage: 2939/2939 (100.00%)

🔍 Checking Val set...
📦 Total images found: 734
✅ Passed checks: 734
🧮 Coverage: 734/734 (100.00%)

🔍 Checking Test set...
📦 Total images found: 3694
✅ Passed checks: 3694
🧮 Coverage: 3694/3694 (100.00%)

🎉 Done!
