In [None]:
import os
import tensorflow as tf

def check_jpeg_validity(image_paths):
    invalid_images = []  # List to store paths of invalid images

    for image_path in image_paths:
        try:
            # Attempt to read and decode the image
            img = tf.io.read_file(image_path)
            tf.image.decode_jpeg(img, channels=3)  # Try to decode the JPEG image
        except Exception as e:
            # If an error occurs, log the invalid image path and error
            print(f"Invalid image: {image_path}. Error: {e}")
            invalid_images.append(image_path)

    return invalid_images

def scan_folders_for_invalid_images(root_folder):
    invalid_files = {}

    # Walk through the root directory and its subdirectories
    for subdir, _, files in os.walk(root_folder):
        # Filter out non-image files and check only JPEG files
        image_files = [os.path.join(subdir, f) for f in files if f.lower().endswith(('.jpg', '.jpeg'))]

        if image_files:  # If there are any image files in the folder
            print(f"Scanning folder: {subdir}")
            # Check validity of JPEG images in the folder
            invalid_images = check_jpeg_validity(image_files)
            
            if invalid_images:
                invalid_files[subdir] = invalid_images  # Store the folder and its invalid images
    
    return invalid_files

# Set the root folder path to your dataset directory
root_folder = '/Users/elizabethcoquillette/Documents/DS5220/Project/small_animals'

# Scan the folders for invalid JPEGs
invalid_files = scan_folders_for_invalid_images(root_folder)

# Output the result
if invalid_files:
    print("\nFolders with invalid JPEGs:")
    for folder, invalid_images in invalid_files.items():
        print(f"Folder: {folder}")
        for img in invalid_images:
            print(f"  - {img}")
else:
    print("No invalid JPEG images found.")