### Random Image Inspection

In [None]:
from PIL import Image
import numpy as np

# Load the image
img_path = r''
img = Image.open(f"{img_path}")

# Show the image
img.show()

# Print image metadata
print("Format:", img.format)
print("Mode:", img.mode)         # e.g., 'RGB', 'I;16', 'L', 'CMYK'
print("Size:", img.size)         # (width, height)

# Convert to NumPy array
img_np = np.array(img)

# Examine array shape and data type
print("Array shape:", img_np.shape)
print("Data type:", img_np.dtype)
print("Min/Max pixel values:", img_np.min(), img_np.max())

## Return Counts of each format

In [4]:
import os
from collections import defaultdict

def count_file_types(path):
    file_type_counts = defaultdict(int)
    
    for root, dirs, files in os.walk(path):
        for file in files:
            _, ext = os.path.splitext(file)
            ext = ext.lower() if ext else 'NO_EXTENSION'
            file_type_counts[ext] += 1
    
    return dict(file_type_counts)

# Example usage

path = r"C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy"
file_types = count_file_types(path)

for ext, count in sorted(file_types.items(), key=lambda x: x[1], reverse=True):
    print(f"{ext}: {count}")

.jpg: 7385


In [9]:
import os

def find_mat_files(folder_path):
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.mat'):
                print(os.path.join(root, file))

# Example usage:
folder = r"C:\Users\arisi\Downloads\archive\OxfordPetsRemoved"
find_mat_files(folder)

C:\Users\arisi\Downloads\archive\OxfordPetsRemoved\Test\Abyssinian\Abyssinian_100.mat
C:\Users\arisi\Downloads\archive\OxfordPetsRemoved\Train_val\Abyssinian\Abyssinian_101.mat
C:\Users\arisi\Downloads\archive\OxfordPetsRemoved\Train_val\Abyssinian\Abyssinian_102.mat


### Move all files of specific format(s)

In [3]:
import os
import shutil

def copy_files_by_type(src_dir, dst_dir, extensions_to_copy):
    """
    Copy files with specified extensions from src_dir to dst_dir, preserving folder structure.

    Args:
        src_dir (str): Source directory to search files in.
        dst_dir (str): Destination directory to copy files to.
        extensions_to_copy (set): Set of lowercase file extensions to copy, e.g., {'.jpg', '.txt'}
    """
    for root, dirs, files in os.walk(src_dir):
        for file in files:
            _, ext = os.path.splitext(file)
            if ext.lower() in extensions_to_copy:
                # Get relative path from the source root
                rel_path = os.path.relpath(root, src_dir)
                # Build target directory path
                target_dir = os.path.join(dst_dir, rel_path)
                os.makedirs(target_dir, exist_ok=True)
                
                # Full source and destination file paths
                src_file_path = os.path.join(root, file)
                dst_file_path = os.path.join(target_dir, file)

                shutil.copy2(src_file_path, dst_file_path)
                print(f"Copied: {src_file_path} -> {dst_file_path}")

# Example usage

src_directory = r"C:\Users\arisi\Downloads\archive\OxfordPetsRemoved"
dst_directory = r"C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy"
file_types = {".jpg", ".png", ".txt"}  # specify extensions to copy

copy_files_by_type(src_directory, dst_directory, file_types)

Copied: C:\Users\arisi\Downloads\archive\OxfordPetsRemoved\Test\Abyssinian\Abyssinian_105.jpg -> C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy\Test\Abyssinian\Abyssinian_105.jpg
Copied: C:\Users\arisi\Downloads\archive\OxfordPetsRemoved\Test\Abyssinian\Abyssinian_136.jpg -> C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy\Test\Abyssinian\Abyssinian_136.jpg
Copied: C:\Users\arisi\Downloads\archive\OxfordPetsRemoved\Test\Abyssinian\Abyssinian_143.jpg -> C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy\Test\Abyssinian\Abyssinian_143.jpg
Copied: C:\Users\arisi\Downloads\archive\OxfordPetsRemoved\Test\Abyssinian\Abyssinian_149.jpg -> C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy\Test\Abyssinian\Abyssinian_149.jpg
Copied: C:\Users\arisi\Downloads\archive\OxfordPetsRemoved\Test\Abyssinian\Abyssinian_15.jpg -> C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy\Test\Abyssinian\Abyssinian_15.jpg
Copied: C:\Users\arisi\Downloads\archive\OxfordPetsRemoved\Tes

### Verify File  Formats and Corruption

In [None]:
from PIL import Image, UnidentifiedImageError
import os

folder_path = r''

for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff', '.webp')):
        filepath = os.path.join(folder_path, filename)
        try:
            with Image.open(filepath) as img:
                img.verify()  # Detect corrupted image
                format = img.format  # Get image format (e.g., JPEG, PNG)
                print(f"{filename} is a valid {format} file.")
        except UnidentifiedImageError:
            print(f"{filename} is NOT a valid image or is corrupted!")
        except Exception as e:
            print(f"{filename} failed with error: {e}")

In [None]:
# Verifies image corruption
import os
from PIL import Image

dataset_dir = r""
for root, dirs, files in os.walk(dataset_dir):
    for file in files:
        file_path = os.path.join(root, file)
        try:
            img = Image.open(file_path)
            img.verify()  # Verify the image is valid
        except Exception as e:
            print(f"Corrupted or invalid image: {file_path} ({e})")

### Convert all files recursively to PNG

In [None]:
import os
from PIL import Image

def convert_tif_to_png_recursive(input_root, output_root):
    for dirpath, _, filenames in os.walk(input_root):
        for file in filenames:
            if file.lower().endswith((".tif", ".tiff")):
                input_path = os.path.join(dirpath, file)
                
                # Build relative path for output
                rel_dir = os.path.relpath(dirpath, input_root)
                output_dir = os.path.join(output_root, rel_dir)
                os.makedirs(output_dir, exist_ok=True)
                
                # Load and convert image
                try:
                    img = Image.open(input_path)
                    
                    # Convert to RGB if not already
                    if img.mode != "RGB":
                        img = img.convert("RGB")
                    
                    # Save as PNG
                    output_filename = os.path.splitext(file)[0] + ".png"
                    output_path = os.path.join(output_dir, output_filename)
                    img.save(output_path)
                    
                    print(f"Converted: {input_path} → {output_path}")
                except Exception as e:
                    print(f"Failed to convert {input_path}: {e}")


input_folder = r""
output_folder = r""
convert_tif_to_png_recursive(input_folder, output_folder)

### Check for Image Dimensions

In [None]:
from PIL import Image
import os

# Path to your folder
folder_path = ''

# Dictionary to track dimensions
dimension_counts = {}

for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
        filepath = os.path.join(folder_path, filename)
        with Image.open(filepath) as img:
            dimensions = img.size  # (width, height)
            dimension_counts[dimensions] = dimension_counts.get(dimensions, 0) + 1

# Print summary
for dims, count in dimension_counts.items():
    print(f"Dimension {dims}: {count} image(s)")

In [None]:
target_size = (224, 224)
for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
        filepath = os.path.join(folder_path, filename)
        with Image.open(filepath) as img:
            if img.size != target_size:
                print(f"{filename} has size {img.size}")

### Color Mode Check

In [7]:
from PIL import Image, UnidentifiedImageError
import os

folder_path = r"C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy\Train_val\Abyssinian"

# Count modes
mode_counts = {}

for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff', '.webp')):
        filepath = os.path.join(folder_path, filename)
        try:
            with Image.open(filepath) as img:
                mode = img.mode  # e.g., 'RGB', 'L', 'CMYK', 'RGBA'
                mode_counts[mode] = mode_counts.get(mode, 0) + 1

                if mode != 'RGB':
                    print(f"⚠️ {filename} is in {mode} mode!")
        except UnidentifiedImageError:
            print(f"❌ {filename} is not a valid image or is corrupted.")

print("\n✅ Color Mode Summary:")
for mode, count in mode_counts.items():
    print(f"{mode}: {count} image(s)")

⚠️ Abyssinian_5.jpg is in RGBA mode!

✅ Color Mode Summary:
RGB: 179 image(s)
RGBA: 1 image(s)


In [8]:
import os
from PIL import Image

def convert_rgba_to_rgb_recursive(folder_path, output_folder=None):
    """
    Recursively convert all RGBA images in folder_path to RGB.
    Saves images either overwriting original or to output_folder if provided.
    """
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)

            try:
                with Image.open(file_path) as img:
                    if img.mode == 'RGBA':
                        print(f"Converting {file_path} from RGBA to RGB")
                        rgb_img = img.convert('RGB')

                        # Determine save path
                        if output_folder:
                            # Preserve directory structure in output_folder
                            relative_path = os.path.relpath(root, folder_path)
                            save_dir = os.path.join(output_folder, relative_path)
                            os.makedirs(save_dir, exist_ok=True)
                            save_path = os.path.join(save_dir, file)
                        else:
                            # Overwrite original file
                            save_path = file_path

                        rgb_img.save(save_path)
            except Exception as e:
                print(f"Skipping {file_path}, error: {e}")

# Example usage:
input_folder = r"C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy"
output_folder = r"C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy2"

convert_rgba_to_rgb_recursive(input_folder, output_folder)

Converting C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy\Train_val\Abyssinian\Abyssinian_5.jpg from RGBA to RGB
Converting C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy\Train_val\Egyptian_Mau\Egyptian_Mau_14.jpg from RGBA to RGB
Converting C:\Users\arisi\Downloads\archive\OxfordPetsRemoved_copy\Train_val\Egyptian_Mau\Egyptian_Mau_186.jpg from RGBA to RGB


### Convert other formats to RGB

In [None]:
import os
from PIL import Image

folder_path = ''
output_path = ''

os.makedirs(output_path, exist_ok=True)

for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff', '.webp')):
        filepath = os.path.join(folder_path, filename)
        with Image.open(filepath) as img:
            if img.mode != 'RGB':
                img = img.convert('RGB')
                print(f"Converted {filename} from {img.mode} to RGB")
            else:
                print(f"{filename} is already RGB")
            img.save(os.path.join(output_path, filename))