In [None]:
import os
import time
from pathlib import Path

import cv2
import numpy as np


ROOT_DIR = r"C:\BPA\data"  
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"}


def is_image_file(path: Path) -> bool:
    return path.suffix.lower() in IMAGE_EXTS


def scan_images(root_dir: str | Path):
    root = Path(root_dir)

    print(f"Scanning directory: {root.resolve()}\n")

    all_images = [p for p in root.rglob("*") if is_image_file(p)]
    total = len(all_images)
    print(f"Found {total} image files.\n")

    if total == 0:
        return

    unreadable = []
    huge_files = []
    stats = {
        "total_read": 0,
        "total_pixels": 0,
        "total_load_time": 0.0,
    }

    for idx, img_path in enumerate(all_images, start=1):
        file_size_mb = img_path.stat().st_size / (1024 * 1024)

        t0 = time.time()
        img = cv2.imread(str(img_path), cv2.IMREAD_UNCHANGED)
        load_time = time.time() - t0

        if img is None:
            print(f"[{idx}/{total}] UNREADABLE: {img_path}")
            unreadable.append(img_path)
            continue

        # Basic info
        if img.ndim == 2:
            h, w = img.shape
            channels = 1
        else:
            h, w, channels = img.shape

        num_pixels = h * w

        stats["total_read"] += 1
        stats["total_pixels"] += num_pixels
        stats["total_load_time"] += load_time


        if file_size_mb > 64:
            huge_files.append(img_path)

        print(
            f"[{idx}/{total}] OK   : {img_path.name}  | "
            f"Size: {file_size_mb:.1f} MB  | "
            f"Shape: {h}x{w}x{channels}  | "
            f"Load time: {load_time:.3f} s"
        )

    print("\n==== SUMMARY ====")
    print(f"Total image files found   : {total}")
    print(f"Successfully read         : {stats['total_read']}")
    print(f"Unreadable / corrupted    : {len(unreadable)}")

    if stats["total_read"] > 0:
        avg_pixels = stats["total_pixels"] / stats["total_read"]
        avg_load_time = stats["total_load_time"] / stats["total_read"]
        print(f"Average pixels per image  : {avg_pixels:,.0f}")
        print(f"Average load time (seconds): {avg_load_time:.3f}")

    if huge_files:
        print("\nImages larger than 64 MB:")
        for p in huge_files:
            print("  -", p)

    if unreadable:
        print("\nUnreadable / failed images:")
        for p in unreadable:
            print("  -", p)


if __name__ == "__main__":
    scan_images(ROOT_DIR)


Scanning directory: C:\BPA\data

Found 1111 image files.

[1/1111] OK   : GS - 100.jpg  | Size: 71.6 MB  | Shape: 7270x10000x3  | Load time: 0.887 s
[2/1111] OK   : GS - 101.jpg  | Size: 2.8 MB  | Shape: 7270x10000x3  | Load time: 0.361 s
[3/1111] OK   : GS - 102.jpg  | Size: 8.0 MB  | Shape: 7270x10000x3  | Load time: 0.440 s
[4/1111] OK   : GS - 103.jpg  | Size: 6.8 MB  | Shape: 7305x10000x3  | Load time: 0.417 s
[5/1111] OK   : GS - 104.jpg  | Size: 6.9 MB  | Shape: 7305x10000x3  | Load time: 0.442 s
[6/1111] OK   : GS - 105.jpg  | Size: 2.3 MB  | Shape: 7305x10000x3  | Load time: 0.361 s
[7/1111] OK   : GS - 106.jpg  | Size: 6.9 MB  | Shape: 7305x10000x3  | Load time: 0.427 s
[8/1111] OK   : GS - 107.jpg  | Size: 6.9 MB  | Shape: 7305x10000x3  | Load time: 0.435 s
[9/1111] OK   : GS - 108.jpg  | Size: 4.7 MB  | Shape: 7312x10000x3  | Load time: 0.374 s
[10/1111] OK   : GS - 109.jpg  | Size: 4.7 MB  | Shape: 7312x10000x3  | Load time: 0.384 s
[11/1111] OK   : GS - 110.jpg  | Size: 4

error: OpenCV(4.12.0) C:\miniconda3\conda-bld\opencv-suite_1761815220365\work\modules\imgcodecs\src\loadsave.cpp:79: error: (-215:Assertion failed) pixels <= CV_IO_MAX_IMAGE_PIXELS in function 'cv::validateInputImageSize'
