### Random Image Inspection

In [2]:
from PIL import Image
import numpy as np

# Load the image
img_path = r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Test\American_Bulldog\american_bulldog_2.jpg'
img = Image.open(f"{img_path}")

# Show the image
img.show()

# Print image metadata
print("Format:", img.format)
print("Mode:", img.mode)         # e.g., 'RGB', 'I;16', 'L', 'CMYK'
print("Size:", img.size)         # (width, height)

# Convert to NumPy array
img_np = np.array(img)

# Examine array shape and data type
print("Array shape:", img_np.shape)
print("Data type:", img_np.dtype)
print("Min/Max pixel values:", img_np.min(), img_np.max())

Format: JPEG
Mode: RGB
Size: (500, 333)
Array shape: (333, 500, 3)
Data type: uint8
Min/Max pixel values: 0 255


### Verify File  Formats and Corruption

In [None]:
from PIL import Image, UnidentifiedImageError
import os

# folder_path = r'\Abyssinian'

# for filename in os.listdir(folder_path):
#     if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff', '.webp')):
#         filepath = os.path.join(folder_path, filename)
#         try:
#             with Image.open(filepath) as img:
#                 img.verify()  # Detect corrupted image
#                 format = img.format  # Get image format (e.g., JPEG, PNG)
#                 print(f"{filename} is a valid {format} file.")
#         except UnidentifiedImageError:
#             print(f"{filename} is NOT a valid image or is corrupted!")
#         except Exception as e:
#             print(f"{filename} failed with error: {e}")

# get subdirs in list, do it for all subdirs
path1 = r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Test'
path2 = r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Train_val'
def get_subdirs(path):
    subdirs = []
    for root, dirs, files in os.walk(path):
        for dir_name in dirs:
            subdirs.append(os.path.join(root, dir_name))
    return subdirs

subdirs1 = get_subdirs(path1)
subdirs2 = get_subdirs(path2)
# join lists
subdirs = subdirs1 + subdirs2

for i in subdirs:
    for filename in os.listdir(i):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff', '.webp','.mat')):
            filepath = os.path.join(i, filename)
            try:
                with Image.open(filepath) as img:
                    img.verify()  # Detect corrupted image
                    format = img.format  # Get image format (e.g., JPEG, PNG)
                    print(f"{filename} is a valid {format} file.")
            except UnidentifiedImageError:
                print(f"{filename} is NOT a valid image or is corrupted!")
            except Exception as e:
                print(f"{filename} failed with error: {e}")

Abyssinian_105.jpg is a valid JPEG file.
Abyssinian_107.jpg is a valid JPEG file.
Abyssinian_109.jpg is a valid JPEG file.
Abyssinian_114.jpg is a valid JPEG file.
Abyssinian_115.jpg is a valid JPEG file.
Abyssinian_124.jpg is a valid JPEG file.
Abyssinian_126.jpg is a valid JPEG file.
Abyssinian_127.jpg is a valid JPEG file.
Abyssinian_153.jpg is a valid JPEG file.
Abyssinian_172.jpg is a valid JPEG file.
Abyssinian_173.jpg is a valid JPEG file.
Abyssinian_207.jpg is a valid JPEG file.
Abyssinian_225.jpg is a valid JPEG file.
Abyssinian_48.jpg is a valid JPEG file.
Abyssinian_66.jpg is a valid JPEG file.
Abyssinian_69.jpg is a valid JPEG file.
Abyssinian_92.jpg is a valid JPEG file.
Abyssinian_96.jpg is a valid JPEG file.
Abyssinian_98.jpg is a valid JPEG file.
american_bulldog_110.jpg is a valid JPEG file.
american_bulldog_127.jpg is a valid JPEG file.
american_bulldog_130.jpg is a valid JPEG file.
american_bulldog_134.jpg is a valid JPEG file.
american_bulldog_146.jpg is a valid JPE

In [None]:
from PIL import Image, UnidentifiedImageError
import os

# get subdirs in list, do it for all subdirs
path1 = r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Test'
path2 = r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Train_val'
def get_subdirs(path):
    subdirs = []
    for root, dirs, files in os.walk(path):
        for dir_name in dirs:
            subdirs.append(os.path.join(root, dir_name))
    return subdirs

subdirs1 = get_subdirs(path1)
subdirs2 = get_subdirs(path2)
# join lists
subdirs = subdirs1 + subdirs2

deleted_images = []

for subdir in subdirs:
    for filename in os.listdir(subdir):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff', '.webp', '.mat')):
            filepath = os.path.join(subdir, filename)
            try:
                # Check structure
                with Image.open(filepath) as img:
                    img.verify()
                # Check actual image data
                with Image.open(filepath) as img:
                    img.load()
            except (UnidentifiedImageError, Exception):
                print(f"Deleting corrupt image: {filepath}")
                deleted_images.append(filepath)
                try:
                    os.remove(filepath)
                except Exception as delete_error:
                    print(f"Failed to delete {filepath}: {delete_error}")

# Write deleted file paths to a text file
with open("deleted_corrupt_images.txt", "w") as f:
    for path in deleted_images:
        f.write(f"{path}\n")

print(f"\nDeleted {len(deleted_images)} corrupt image(s). List saved to 'deleted_corrupt_images.txt'")


Deleted 0 corrupt image(s). List saved to 'deleted_corrupt_images.txt'


In [25]:
import scipy.io
from PIL import Image
import numpy as np

# Load the .mat file
path = r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Train_val\Abyssinian\Abyssinian_100.mat'
mat = scipy.io.loadmat(path)

# Check keys in the mat dictionary to find your image data variable
print(mat.keys())

# Suppose your image array is under the key 'image_data'
image_array = mat['frames']

# If needed, convert the array to uint8 and ensure shape fits an image (e.g., 2D grayscale or 3D RGB)
# For example, normalize if float:
if image_array.dtype != np.uint8:
    image_array = image_array[:, :, :3] 
    image_array = (255 * (image_array - image_array.min()) / np.ptp(image_array)).astype(np.uint8)

# Convert to PIL Image and save
img = Image.fromarray(image_array)
img.save('output_image.jpeg')

dict_keys(['__header__', '__version__', '__globals__', 'frames', 'binsa'])


IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed

In [22]:
print(type(mat['frames']), mat['frames'].shape)
print(type(mat['binsa']), mat['binsa'].shape)

<class 'numpy.ndarray'> (4, 175288)
<class 'numpy.ndarray'> (1, 175288)


In [None]:
print(mat['binsa'])

In [31]:
print(mat['__header__'])
print(mat['__version__'])
print(type(mat['frames']), mat['frames'].shape)
print(type(mat['binsa']), mat['binsa'].shape)

b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Wed Apr 20 19:01:29 2011'
1.0
<class 'numpy.ndarray'> (4, 175288)
<class 'numpy.ndarray'> (1, 175288)


In [18]:
# get subdirs in list, do it for all subdirs
path1 = r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Test'
path2 = r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Train_val'
def get_subdirs(path):
    subdirs = []
    for root, dirs, files in os.walk(path):
        for dir_name in dirs:
            subdirs.append(os.path.join(root, dir_name))
    return subdirs

subdirs1 = get_subdirs(path1)
subdirs2 = get_subdirs(path2)
# join lists
subdirs = subdirs1 + subdirs2

corrupt_images = []

for subdir in subdirs:
    for filename in os.listdir(subdir):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            filepath = os.path.join(subdir, filename)
            try:
                img = cv2.imread(filepath)
                if img is None:
                    raise ValueError("cv2 could not decode image.")
            except Exception as e:
                print(f"Deleting corrupt image: {filepath} ({e})")
                corrupt_images.append(filepath)
                try:
                    os.remove(filepath)
                except Exception as delete_error:
                    print(f"Failed to delete {filepath}: {delete_error}")

# Write paths to a file
with open("deleted_corrupt_images.txt", "w") as f:
    for path in corrupt_images:
        f.write(f"{path}\n")

Deleting corrupt image: C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Test\Abyssinian\Abyssinian_105.jpg (name 'cv2' is not defined)
Deleting corrupt image: C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Test\Abyssinian\Abyssinian_107.jpg (name 'cv2' is not defined)
Deleting corrupt image: C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Test\Abyssinian\Abyssinian_109.jpg (name 'cv2' is not defined)
Deleting corrupt image: C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Test\Abyssinian\Abyssinian_114.jpg (name 'cv2' is not defined)
Deleting corrupt image: C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Test\Abyssinian\Abyssinian_115.jpg (name 'cv2' is not defined)
Deleting corrupt image: C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Test\Abyssinian\Abyssinia

In [None]:
# Verifies image corruption
import os
from PIL import Image

dataset_dir = r""
for root, dirs, files in os.walk(dataset_dir):
    for file in files:
        file_path = os.path.join(root, file)
        try:
            img = Image.open(file_path)
            img.verify()  # Verify the image is valid
        except Exception as e:
            print(f"Corrupted or invalid image: {file_path} ({e})")

### Convert all files recursively to PNG

In [None]:
import os
from PIL import Image

def convert_tif_to_png_recursive(input_root, output_root):
    for dirpath, _, filenames in os.walk(input_root):
        for file in filenames:
            if file.lower().endswith((".tif", ".tiff")):
                input_path = os.path.join(dirpath, file)
                
                # Build relative path for output
                rel_dir = os.path.relpath(dirpath, input_root)
                output_dir = os.path.join(output_root, rel_dir)
                os.makedirs(output_dir, exist_ok=True)
                
                # Load and convert image
                try:
                    img = Image.open(input_path)
                    
                    # Convert to RGB if not already
                    if img.mode != "RGB":
                        img = img.convert("RGB")
                    
                    # Save as PNG
                    output_filename = os.path.splitext(file)[0] + ".png"
                    output_path = os.path.join(output_dir, output_filename)
                    img.save(output_path)
                    
                    print(f"Converted: {input_path} → {output_path}")
                except Exception as e:
                    print(f"Failed to convert {input_path}: {e}")


input_folder = r""
output_folder = r""
convert_tif_to_png_recursive(input_folder, output_folder)

### Convert Specific Files to JPEG

In [5]:
# path =r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Train_val\Abyssinian\Abyssinian_34.jpg'
# path =r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Train_val\Egyptian_Mau\Egyptian_Mau_139.jpg'
# path =r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Train_val\Egyptian_Mau\Egyptian_Mau_145.jpg'
# path =r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Train_val\Egyptian_Mau\Egyptian_Mau_167.jpg'
# path =r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Train_val\Egyptian_Mau\Egyptian_Mau_177.jpg'
# beagle_116
path =r'C:\Programming_Files\JupyterVSCode\Multiclass_Transfer_Learning\OxfordPets_37\Dataset\Train_val\Beagle\beagle_116.jpg'
# chihuahua_121

from PIL import Image

# Open the GIF file
gif_image = Image.open(path)

# Convert to RGB (JPEG doesn't support transparency)
jpeg_image = gif_image.convert('RGB')

# Save as JPEG
jpeg_image.save('output.jpeg', 'JPEG')

### Check for Image Dimensions

In [None]:
from PIL import Image
import os

# Path to your folder
folder_path = ''

# Dictionary to track dimensions
dimension_counts = {}

for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
        filepath = os.path.join(folder_path, filename)
        with Image.open(filepath) as img:
            dimensions = img.size  # (width, height)
            dimension_counts[dimensions] = dimension_counts.get(dimensions, 0) + 1

# Print summary
for dims, count in dimension_counts.items():
    print(f"Dimension {dims}: {count} image(s)")

In [None]:
target_size = (224, 224)
for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
        filepath = os.path.join(folder_path, filename)
        with Image.open(filepath) as img:
            if img.size != target_size:
                print(f"{filename} has size {img.size}")

### Color Mode Check

In [None]:
from PIL import Image, UnidentifiedImageError
import os

folder_path = ''

# Count modes
mode_counts = {}

for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff', '.webp')):
        filepath = os.path.join(folder_path, filename)
        try:
            with Image.open(filepath) as img:
                mode = img.mode  # e.g., 'RGB', 'L', 'CMYK', 'RGBA'
                mode_counts[mode] = mode_counts.get(mode, 0) + 1

                if mode != 'RGB':
                    print(f"⚠️ {filename} is in {mode} mode!")
        except UnidentifiedImageError:
            print(f"❌ {filename} is not a valid image or is corrupted.")

print("\n✅ Color Mode Summary:")
for mode, count in mode_counts.items():
    print(f"{mode}: {count} image(s)")

### Convert other formats to RGB

In [None]:
import os
from PIL import Image

folder_path = ''
output_path = ''

os.makedirs(output_path, exist_ok=True)

for filename in os.listdir(folder_path):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff', '.webp')):
        filepath = os.path.join(folder_path, filename)
        with Image.open(filepath) as img:
            if img.mode != 'RGB':
                img = img.convert('RGB')
                print(f"Converted {filename} from {img.mode} to RGB")
            else:
                print(f"{filename} is already RGB")
            img.save(os.path.join(output_path, filename))