# 0.1 Data Processing

### Library import

In [None]:
import random
import os
import glob
import numpy as np
import torch
from torchvision import transforms
from torchvision.transforms import Compose
from PIL import Image
from matplotlib import pyplot as plt

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

### Paths organization

In [None]:
# Dataset
pwd_notebook = os.path.abspath('') # path notebook
root_path = os.path.dirname(os.path.dirname(pwd_notebook)) #path root project

data_dir = os.path.join(root_path, 'data') #path data

# train path
train_dir = os.path.join(data_dir, 'train')
print(train_dir)
# test path
test_dir = os.path.join(data_dir, 'test')
print(test_dir)
#valid path
valid_dir = os.path.join(data_dir, 'valid')
print(valid_dir)

### Transformation

In [None]:
# Write transform for image
data_transform = Compose([
    # Resize the images to 64x64
    # transforms.Resize(size=(64, 64)),
    # Flip the images randomly on the horizontal
    transforms.RandomHorizontalFlip(p=0.5), # p = probability of flip, 0.5 = 50% chance
    # Turn the image into a torch.Tensor
    transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0 
])

### Preprocessing

In [None]:
def plot_transformed_images(image_paths, transform, n=3, seed=40):
    """Plots a series of random images from image_paths.

    Will open n image paths from image_paths, transform them
    with transform and plot them side by side.

    Args:
        image_paths (list): List of target image paths. 
        transform (PyTorch Transforms): Transforms to apply to images.
        n (int, optional): Number of images to plot. Defaults to 3.
        seed (int, optional): Random seed for the random generator. Defaults to 42.
    """
    # random.seed(seed)
    random_image_paths = random.sample(image_paths, k=n)
    for image_path in random_image_paths:
        with Image.open(image_path) as f:

            fig, ax = plt.subplots(1, 2)

            # Normalize and display image in "I" mode
            if f.mode == "I":
                f_normalized = (np.array(f) - np.min(f)) / (np.max(f) - np.min(f)) * 255
                ax[0].imshow(f_normalized)
            else:
                ax[0].imshow(f) 
                
            ax[0].set_title(f"Original \nSize: {f.size} \nMode:{f.mode}")    
            ax[0].axis("off")

            # Transform and plot image
            # Note: permute() will change shape of image to suit matplotlib 
            # (PyTorch default is [C, H, W] but Matplotlib is [H, W, C])
            transformed_image = transform(f).permute(1, 2, 0) 
            ax[1].imshow(transformed_image) 
            ax[1].set_title(f"Transformed \nSize: {transformed_image.shape} \nMode:{f.mode}")
            ax[1].axis("off")

            image_class = os.path.basename(os.path.dirname(image_path))

            fig.suptitle(f"Class: {image_class}", fontsize=16)
            plt.show()

In [None]:
# Set seed
# 1. Get all image paths (* means "any combination")
image_path_list = glob.glob(os.path.join(data_dir, '**', '*.png'), recursive=True)
# 2. Get random image path
random_image_path = random.choice(image_path_list)
# 3. Get image class from path name (the image class is the name of the directory where the image is stored)
image_class = os.path.basename(os.path.dirname(random_image_path))
# 4. Open image
img = Image.open(random_image_path)
# 5. Print metadata
print(f"Random image path: {random_image_path}")
print(f"Image class: {image_class}")
print(f"Image height: {img.height}") 
print(f"Image width: {img.width}")
print(f"Image mode: {img.mode}")
print(f"Dataset Size: {len(image_path_list)}")
print(image_path_list)
img

In [None]:
# Show images
plot_transformed_images(image_path_list, 
                        transform=data_transform, 
                        n=3) #Enter the number of images you want to see