In [None]:
import os
import time
from pathlib import Path

import cv2
import numpy as np


# Change this to your main images directory
ROOT_DIR = r"C\BPA\data"  


# Extensions we will treat as images
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"}


def is_image_file(path: Path) -> bool:
    return path.suffix.lower() in IMAGE_EXTS


def scan_images(root_dir: str | Path):
    root = Path(root_dir)

    print(f"Scanning directory: {root.resolve()}\n")

    all_images = [p for p in root.rglob("*") if is_image_file(p)]
    total = len(all_images)
    print(f"Found {total} image files.\n")

    if total == 0:
        return

    unreadable = []
    huge_files = []
    stats = {
        "total_read": 0,
        "total_pixels": 0,
        "total_load_time": 0.0,
    }

    for idx, img_path in enumerate(all_images, start=1):
        file_size_mb = img_path.stat().st_size / (1024 * 1024)

        t0 = time.time()
        img = cv2.imread(str(img_path), cv2.IMREAD_UNCHANGED)
        load_time = time.time() - t0

        if img is None:
            print(f"[{idx}/{total}] UNREADABLE: {img_path}")
            unreadable.append(img_path)
            continue

        # Basic info
        if img.ndim == 2:
            h, w = img.shape
            channels = 1
        else:
            h, w, channels = img.shape

        num_pixels = h * w

        stats["total_read"] += 1
        stats["total_pixels"] += num_pixels
        stats["total_load_time"] += load_time

        # Mark huge files (e.g., > 64 MB)
        if file_size_mb > 64:
            huge_files.append(img_path)

        # Print some info for each image (you can comment this out if too verbose)
        print(
            f"[{idx}/{total}] OK   : {img_path.name}  | "
            f"Size: {file_size_mb:.1f} MB  | "
            f"Shape: {h}x{w}x{channels}  | "
            f"Load time: {load_time:.3f} s"
        )


    print(f"Total image files found   : {total}")
    print(f"Successfully read         : {stats['total_read']}")
    print(f"Unreadable / corrupted    : {len(unreadable)}")

    if stats["total_read"] > 0:
        avg_pixels = stats["total_pixels"] / stats["total_read"]
        avg_load_time = stats["total_load_time"] / stats["total_read"]
        print(f"Average pixels per image  : {avg_pixels:,.0f}")
        print(f"Average load time (seconds): {avg_load_time:.3f}")

    if huge_files:
        print("\nImages larger than 64 MB:")
        for p in huge_files:
            print("  -", p)

    if unreadable:
        print("\nUnreadable / failed images:")
        for p in unreadable:
            print("  -", p)


if __name__ == "__main__":
    scan_images(ROOT_DIR)
