In [None]:
!pip install wandb

In [None]:
!wandb login API-KEY

In [1]:
import argparse
import os
import os.path

import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader, random_split

import wandb
from shared_methods import all_labels
from simpsons_neural_network_2 import SimpsonsNet2

# Check if we can use Cuda

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

# device = "cpu" # uncomment if you want to use "cpu", currently cpu is faster than cuda (maybe because the NN is very little)
print(f"Using {device} device")

Using cuda device


# Initialize wandb

In [3]:
config = argparse.Namespace()
config.learning_rate = 0.01
config.epochs = 30
config.batch_size = 32

# Creating a custom Dataset Class

In [4]:
class SimpsonsImageDataset(Dataset):
    def __init__(self, tensor, label):
        self.tensor = tensor
        self.label = label

    def __len__(self):
        return len(self.tensor)

    def __getitem__(self, index):
        return self.tensor[index], self.label[index]

# Loading an image and creating a label

In [5]:
# Functions for image handling

def image_to_tensor(_image_path: str) -> torch.Tensor:
    image = Image.open(_image_path)

    transform = transforms.Compose([transforms.Resize((224, 224)),
                                    transforms.ToTensor()])
    _image_tensor = transform(image)  # image_tensor now has a shape of torch.Size([3, 224, 224])

    # RandomHorizontalFlip
    # --> randomly mirror the image from the left to right
    # RandomRotation
    # --> rotate the image by a random angle within a given range, in this case from -15 to +15 degrees
    # when we omitted those steps, we received better results
    # compare:
    # https://wandb.ai/hamm-daniel/kaggle-simpsons/runs/1c2ehxl7?workspace=user-hamm-daniel (preprocessing)
    # https://wandb.ai/hamm-daniel/kaggle-simpsons/runs/cxjx9df9/workspace?workspace=user-hamm-daniel (no preprocessing)

    # we add a batch dimension since most neural network frameworks expect input in the form of batches
    # the batch dimension helps in parallel processing and is essential for training the model with
    # multiple samples
    # _image_tensor = _image_tensor.unsqueeze(0)  # image_tensor now has a shape of torch.Size([1, 3, 224, 224])
    # --> this is not necessary if we use a Dataloader, which adds the batch dimension automatically

    # image_tensor now has these dimensions: [batch_size, channels, height, width]
    # the channel dimension refers to the different color layers that make up an image. Usually, we have 3 channels: RGB
    # by using transforms.ToTensor(), we automatically normalize the pixel values to a range between 0 and 1 (instead of 0 to 255).
    # it is important to understand each value in the multidimensional array is between 0 and 1 now

    return _image_tensor.to(device)

def show_image_by_tensor(_image_tensor: torch.Tensor) -> None:
    _image_tensor = _image_tensor.squeeze(0)  # remove the batch dimension
    transform = transforms.Compose([transforms.ToPILImage()])

    # convert tensor to PIL image
    image_pil = transform(_image_tensor)

    # display the image
    image_pil.show()


def get_label_for_image_path(_image_path: str) -> torch.Tensor:
    # here we are returning a tensor with just one dimension - it is equal to the size of the batch dimension of a single image
    # depending on the image_path, a label tensor with value between 0 and 19 is created (since we have 20 different characters)
    directory = os.path.basename(os.path.dirname(_image_path))
    label_idx = all_labels.index(directory)
    return torch.tensor(label_idx, dtype=torch.long).to(device)

def get_character_for_label(_label_tensor: str) -> str:
    return all_labels[_label_tensor[0]]


In [6]:
image_tensors = []
label_tensors = []

root_dir = "data/train"

for character in os.listdir(root_dir):
    char_dir = os.path.join(root_dir, character)

    # check if it's a folder
    if os.path.isdir(char_dir):
        print(f"Processing folder {char_dir}")

        # iterate through all the files
        for filename in os.listdir(char_dir):
            if filename.endswith(".jpg"):
                img_path = os.path.join(char_dir, filename)

                # load the tensor and the label for the image
                image_tensor = image_to_tensor(img_path)
                label_tensor = get_label_for_image_path(img_path)

                # append to list
                image_tensors.append(image_tensor)
                label_tensors.append(label_tensor)

print(f"Finished processing, got {len(image_tensors)} image tensors and {len(label_tensors)} label tensors")

Processing folder data/train\abraham_grampa_simpson
Processing folder data/train\agnes_skinner
Processing folder data/train\apu_nahasapeemapetilon
Processing folder data/train\barney_gumble
Processing folder data/train\bart_simpson
Processing folder data/train\carl_carlson
Processing folder data/train\charles_montgomery_burns
Processing folder data/train\chief_wiggum
Processing folder data/train\cletus_spuckler
Processing folder data/train\comic_book_guy
Processing folder data/train\disco_stu
Processing folder data/train\edna_krabappel
Processing folder data/train\fat_tony
Processing folder data/train\gil
Processing folder data/train\groundskeeper_willie
Processing folder data/train\homer_simpson
Processing folder data/train\kent_brockman
Processing folder data/train\krusty_the_clown
Processing folder data/train\lenny_leonard
Processing folder data/train\lionel_hutz
Processing folder data/train\lisa_simpson
Processing folder data/train\maggie_simpson
Processing folder data/train\marge_

# Creating the Dataset

In [7]:
image_tensor_combined = torch.stack(image_tensors)
label_tensor_combined = torch.stack(label_tensors)

In [8]:
simpsons_dataset = SimpsonsImageDataset(image_tensor_combined, label_tensor_combined)

# Splitting into train and test set

In [9]:
total_size = len(simpsons_dataset)
train_size = int(0.8 * total_size)
validation_size = total_size - train_size

train_dataset, validation_dataset = random_split(simpsons_dataset, [train_size, validation_size])

train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=config.batch_size, shuffle=False)
print(f"I am using {len(train_dataset)} images for training and {len(validation_dataset)} images for validation")

I am using 13620 images for training and 3406 images for validation


# Neural Network Architecture, loss function and optimizer

In [10]:
# create a complete CNN
model = SimpsonsNet2()
config.model = model.__class__
print(model)

model.to(device)

# loss function
loss_function = nn.CrossEntropyLoss()
# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate)
# adding a scheduler to reduce the learning_rate as soon as the validation loss stops decreasing.
# this is to try to prevent overfitting of the model
scheduler = ReduceLROnPlateau(optimizer, 'min')  # 'min' means reducing the LR when the metric stops decreasing


SimpsonsNet2(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=25088, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=29, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)


# Train the Network

In [11]:
wandb.init(project="kaggle-simpsons", config=vars(config))

[34m[1mwandb[0m: Currently logged in as: [33mhamm-daniel[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [12]:
wandb.watch(model)

best_val_loss = float('inf')

for epoch in range(1, config.epochs + 1):
    # Training
    model.train()
    train_loss = 0.0

    batch_number = 0
    for images, labels in train_loader:
        if batch_number % 50 == 0:
            print(f"batch {batch_number} from {len(train_loader)} ...")
        batch_number += 1

        optimizer.zero_grad()
        output = model(images)  # output shape: torch.Size([32, 29])
        # the first dimension has a size of 32 due to our batch size (changes with different batch sizes)
        # second dimension is 29 because we have 29 output labels

        loss = loss_function(output, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in validation_loader:
            output = model(images)
            loss = loss_function(output, labels)
            val_loss += loss.item()

    # step the scheduler - adjust the learning rate if validation loss stops decresing
    scheduler.step(val_loss)

    print(f"Epoch {epoch}, Train Loss: {train_loss/len(train_loader)}, Validation Loss: {val_loss/len(validation_loader)}")
    wandb.log({'epoch': epoch, 'training loss': train_loss, 'validation loss': val_loss, 'adjusted learning rate': optimizer.param_groups[0]['lr']})

    # Save model if validation loss has decreased
    if val_loss < best_val_loss:
        torch.save(model.state_dict(), "best_model.pth")
        best_val_loss = val_loss


batch 0 from 426 ...
batch 50 from 426 ...
batch 100 from 426 ...
batch 150 from 426 ...
batch 200 from 426 ...
batch 250 from 426 ...
batch 300 from 426 ...
batch 350 from 426 ...
batch 400 from 426 ...
Epoch 1, Train Loss: 2.1559062594539125, Validation Loss: 1.6470178287720012
batch 0 from 426 ...
batch 50 from 426 ...
batch 100 from 426 ...
batch 150 from 426 ...
batch 200 from 426 ...
batch 250 from 426 ...
batch 300 from 426 ...
batch 350 from 426 ...
batch 400 from 426 ...
Epoch 2, Train Loss: 1.4071181562025223, Validation Loss: 1.4765519962132534
batch 0 from 426 ...
batch 50 from 426 ...
batch 100 from 426 ...
batch 150 from 426 ...
batch 200 from 426 ...
batch 250 from 426 ...
batch 300 from 426 ...
batch 350 from 426 ...
batch 400 from 426 ...
Epoch 3, Train Loss: 1.137740563618745, Validation Loss: 1.14734801287963
batch 0 from 426 ...
batch 50 from 426 ...
batch 100 from 426 ...
batch 150 from 426 ...
batch 200 from 426 ...
batch 250 from 426 ...
batch 300 from 426 ...
ba

In [13]:
  wandb.finish()

0,1
adjusted learning rate,███████████████████████▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
training loss,█▆▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation loss,█▇▄▃▄▂▂▃▁▁▂▃▁▁▂▂▁▂▁▁▂▂▁▂▁▁▁▁▁▁

0,1
adjusted learning rate,0.001
epoch,30.0
training loss,3.95125
validation loss,73.20173
