# data preparation:  different image shapes
Now we add padding so that every image has the shape 512 x 512, and it is reduced to an image with only one channel, as all the images naturally use grayscale.

However, there are still images with watermarks on them etc, ... so this data set is still not too good.

In [1]:
from PIL import Image
from numpy import asarray


In [2]:
import os
from PIL import Image, ImageOps

def process_images(base_dir, final_shape=(512, 512)):
    # Iterate through all directories and subdirectories
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.endswith(('.jpg')): 
                img_path = os.path.join(root, file)
                
                # Open the image
                with Image.open(img_path) as img:
                    # Convert to grayscale
                    img = img.convert('L')  # 'L' mode is for grayscale

                    # Check if the image is quadratic and apply padding if necessary
                    width, height = img.size
                    if width != height:
                        # Determine the size of the new square image
                        new_size = max(width, height)
                        # Create a new square image with a black background
                        img = ImageOps.expand(img, (0, 0, new_size - width, new_size - height), fill='black')

                    # Resize the image to final_shape, e.g., (512, 512)
                    img = img.resize(final_shape, Image.LANCZOS)

                    # Save the transformed image
                    img.save(img_path) 

                    

# Usage
base_directory = 'archive/gray_smaller_size/'
final_shape = (128,128)
process_images(base_directory, final_shape)


check that all went well

In [3]:
import os
import numpy as np
from PIL import Image

def check_image_shapes(base_dir):
    expected_shape = None  # Variable to store the expected shape
    all_images_correct_shape = True  # Flag to track if all images are correct shape

    # Iterate through all directories and subdirectories
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.endswith(('.jpg', '.jpeg', '.png')):  # Add other formats if needed
                img_path = os.path.join(root, file)
                
                # Open the image
                with Image.open(img_path) as img:
                    # Convert the image to a NumPy array
                    img_array = np.array(img)

                    # Check if expected_shape is set
                    if expected_shape is None:
                        expected_shape = img_array.shape  # Set the expected shape from the first image
                    else:
                        # Compare the shape of the current image with the expected shape
                        if img_array.shape != expected_shape:
                            print(f"Image {img_path} has shape {img_array.shape}, expected {expected_shape}.")
                            all_images_correct_shape = False

    if all_images_correct_shape:
        print("All images have the same shape:", expected_shape)
    else:
        print("Some images do not have the expected shape.")


check_image_shapes(base_directory)


All images have the same shape: (128, 128)
