In [67]:
import os
import os.path

import torch

from PIL import Image
import torchvision.transforms as transforms

from torch.utils.data import Dataset, DataLoader, random_split

In [68]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

# device = "cpu" # uncomment if you want to use "cpu", currently cpu is faster than cuda (maybe because the NN is very little)
print(f"Using {device} device")

Using cuda device


# Creating a custom Dataset Class

In [69]:
class SimpsonsImageDataset(Dataset):
    def __init__(self, tensor, label):
        self.tensor = tensor
        self.label = label

    def __len__(self):
        return len(self.tensor)

    def __getitem__(self, index):
        return self.tensor[index], self.label[index]

# Loading an image and creating a label

In [70]:
all_labels = ["abraham_grampa_simpson",
              "agnes_skinner"]

In [71]:
# Functions for image handling

def show_image_by_path(_image_path: str) -> None:
    image = Image.open(_image_path)
    image.show()

def image_to_tensor(_image_path: str) -> torch.Tensor:
    image = Image.open(_image_path)

    transform = transforms.Compose([transforms.Resize((224, 224)),
                                    transforms.RandomHorizontalFlip(),
                                    transforms.RandomRotation(15),
                                    transforms.ToTensor()])
    _image_tensor = transform(image)  # image_tensor now has a shape of torch.Size([3, 224, 224])

    # RandomHorizontalFlip
    # --> randomly mirror the image from the left to right
    # RandomRotation
    # --> rotate the image by a random angle within a given range, in this case from -15 to +15 degrees

    # we add a batch dimension since most neural network frameworks expect input in the form of batches
    # the batch dimension helps in parallel processing and is essential for training the model with
    # multiple samples
    _image_tensor = _image_tensor.unsqueeze(0)  # image_tensor now has a shape of torch.Size([1, 3, 224, 224])

    # image_tensor now has these dimensions: [batch_size, channels, height, width]
    # the channel dimension refers to the different color layers that make up an image. Usually, we have 3 channels: RGB
    # by using transforms.ToTensor(), we automatically normalize the pixel values to a range between 0 and 1 (instead of 0 to 255).
    # it is important to understand each value in the multidimensional array is between 0 and 1 now

    return _image_tensor

def show_image_by_tensor(_image_tensor: torch.Tensor) -> None:
    _image_tensor = _image_tensor.squeeze(0)  # remove the batch dimension
    transform = transforms.Compose([transforms.ToPILImage()])

    # convert tensor to PIL image
    image_pil = transform(_image_tensor)

    # display the image
    image_pil.show()


def get_label_for_image_path(_image_path: str) -> torch.Tensor:
    # here we are returning a tensor with just one dimension - it is equal to the size of the batch dimension of a single image
    # depending on the image_path, a label tensor with value between 0 and 19 is created (since we have 20 different characters)
    directory = os.path.basename(os.path.dirname(_image_path))
    label_idx = all_labels.index(directory)
    return torch.tensor([label_idx], dtype=torch.long)

In [73]:
image_path_grampa = "data/train/abraham_grampa_simpson/pic_0000.jpg"
image_path_agnes = "data/train/agnes_skinner/pic_0000.jpg"
show_image_by_path(image_path_grampa)

In [74]:
image_tensor_grampa = image_to_tensor(image_path_grampa)
image_tensor_agnes = image_to_tensor(image_path_agnes)

In [75]:
label_tensor_grampa = get_label_for_image_path(image_path_grampa)
label_tensor_agnes = get_label_for_image_path(image_path_agnes)

In [77]:
show_image_by_tensor(image_tensor_agnes)

# Creating the Dataset

In [78]:
image_tensor_combined = torch.cat([image_tensor_grampa, image_tensor_agnes], dim=0)
label_tensor_combined = torch.cat([label_tensor_grampa, label_tensor_agnes], dim=0)

In [79]:
simpsons_dataset = SimpsonsImageDataset(image_tensor_combined, label_tensor_combined)

# Splitting into train and test set

In [80]:
total_size = len(simpsons_dataset)
train_size = int(0.8 * total_size)
test_size = total_size - train_size

train_dataset, test_dataset = random_split(simpsons_dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)