In [2]:
import os
from PIL import Image
import torch
from torchvision import transforms
from tqdm import tqdm

def check_image(file_path):
    try:
        # Try opening the image with PIL
        with Image.open(file_path) as img:
            img.verify()
        
        # Try loading and transforming the image with PyTorch
        transform = transforms.Compose([
            transforms.ToTensor(),
        ])
        img = Image.open(file_path).convert('RGB')
        tensor = transform(img)
        
        # Check for NaN or Inf values
        if torch.isnan(tensor).any() or torch.isinf(tensor).any():
            return False, "Image contains NaN or Inf values"
        
        return True, "Image is valid"
    except Exception as e:
        return False, str(e)

def scan_directory(directory):
    corrupted_images = []
    for root, _, files in os.walk(directory):
        for file in tqdm(files, desc="Checking images"):
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')):
                file_path = os.path.join(root, file)
                is_valid, message = check_image(file_path)
                if not is_valid:
                    corrupted_images.append((file_path, message))
    return corrupted_images

# Usage
image_directory = "/root/227_yolo_training_Dataset/val/images"
corrupted = scan_directory(image_directory)

if corrupted:
    print("Corrupted images found:")
    for img, msg in corrupted:
        print(f"{img}: {msg}")
else:
    print("No corrupted images found.")

Checking images: 100%|██████████| 124/124 [00:37<00:00,  3.28it/s]

No corrupted images found.



