# 1. Dataset preprocessing and augmentation 

## 1.1 Dataset preprocessing (convert RGBA to RGB and resize)

### 1.1.1 Convert RGBA to RGB

We noticed that there are some images with RGBA mode, we need to convert them to RGB in order to be able to process them

In [6]:
import os
from PIL import Image

def check_image_mode(input_folder):
    """
    Check all images in a folder and identify those with RGBA mode
    
    Args:
        input_folder: Path to the folder containing images
    """
    rgba_images = []
    total_images = 0
    
    for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            total_images += 1
            input_path = os.path.join(input_folder, filename)
            try:
                with Image.open(input_path) as img:
                    if img.mode == 'RGBA':
                        rgba_images.append(filename)
                        print(f"RGBA image found: {filename}")
            except Exception as e:
                print(f"Error processing {filename}: {str(e)}")
    
    # Print summary
    print("\nSummary:")
    print(f"Total images processed: {total_images}")
    print(f"Number of RGBA images: {len(rgba_images)}")
    print(f"Percentage of RGBA images: {(len(rgba_images)/total_images)*100:.2f}%")
    
    return rgba_images

# Use the same input folder from your resize script
input_folder = "../Dataset/TrainVal/color"
rgba_files = check_image_mode(input_folder)

RGBA image found: Egyptian_Mau_186.jpg
RGBA image found: Egyptian_Mau_14.jpg

Summary:
Total images processed: 3680
Number of RGBA images: 2
Percentage of RGBA images: 0.05%


### 1.1.2 Resize images and add padding to images (in order to make them squared)

In [14]:
import os
from PIL import Image

def resize_and_pad_image(image_path, target_size, output_path, colouring_method):
    """
    Resize an image while maintaining aspect ratio and add padding to make it square
    
    Args:
        image_path: Path to the input image
        target_size: Tuple of (width, height) for the desired size
        output_path: Path to save the resized image
    """
    with Image.open(image_path) as img:
        # Convert RGBA to RGB if necessary
        if img.mode == 'RGBA':
            img = img.convert('RGB')
            
        # Calculate aspect ratio
        width_ratio = target_size[0] / img.size[0]
        height_ratio = target_size[1] / img.size[1]
        
        # Use the smaller ratio to ensure the image fits within target dimensions
        resize_ratio = min(width_ratio, height_ratio)
        
        # Calculate new dimensions
        new_size = (
            int(img.size[0] * resize_ratio),
            int(img.size[1] * resize_ratio)
        )
        
        # Resize the image
        resized_img = img.resize(new_size, colouring_method)
        
        # Create new black image with target size
        padded_img = Image.new('RGB', target_size, (0, 0, 0))
        
        # Calculate position to paste resized image (center it)
        paste_position = (
            (target_size[0] - new_size[0]) // 2,
            (target_size[1] - new_size[1]) // 2
        )
        
        # Paste resized image onto black background
        padded_img.paste(resized_img, paste_position)
        
        # Save the final image
        padded_img.save(output_path, quality=95)

# Example usage
target_size = (300, 300)  # This will be the final size of the square image
input_folder = "../Dataset/TrainVal/color"
output_folder = "../Dataset_resized"
colouring_method = Image.Resampling.LANCZOS
# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Process all images in the input folder
for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)
        resize_and_pad_image(input_path, target_size, output_path, colouring_method)

What this code does is that it resizes all the images in the input folder to (300)x(z), where z is a number that is less than 300, while maintaining the aspect ratio. It also adds padding to the shorter side to make the image squared. The output images are saved in the output folder. Now, we have the masks left.

In [15]:
input_folder = "../Dataset/TrainVal/label"
output_folder = "../Dataset_resized_masks"
os.makedirs(output_folder, exist_ok=True)

colouring_method = Image.Resampling.NEAREST

for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)
        resize_and_pad_image(input_path, target_size, output_path, colouring_method)