# Hyperparameters

In [71]:
EPOCHS = 20
BATCH_SIZE = 64
LEARNING_RATE = 0.01
SEED = 42

# Step 1: Select Task/Dataset
I chose Tiny ImageNet, which contains 100000 images of 200 classes (500 for each class) resized to 64x64 color images.

In [2]:
# from datasets import load_dataset
# from classes import i2d
# import json

In [3]:
# dataset = load_dataset('Maysee/tiny-imagenet', split='train')

In [4]:
# dataset[0]

In [5]:
# with open("dataset_infos.json") as file:
#     dataset_infos = json.load(file)

In [6]:
# class_names = dataset_infos["Maysee--tiny-imagenet"]["features"]["label"]["names"]
# idx2class = {i: class_names[i] for i in range(len(class_names))}

# Step 2: Get to know the data
The dataset is well balanced and has 500 images for each class

In [7]:
# from collections import defaultdict

# class_counts = defaultdict(int)
# for instance in dataset:
#     label = instance['label']
#     class_counts[label] += 1

# for label, count in class_counts.items():
#     print(f"Class {label}: {count} instances")


# Step 3: Structure Modeling

### Step 3.1 Determine how (with which metrics) you want to evaluate your model. Also, consider the error in estimating the metrics.
We will use accuracy and F1 macro (precision, recall) to evaluate our model.

### Step 3.2 Implement basic functionality to train models and evaluate them against each other. It is recommended to use a suitable MLOps platform (e.g. W&B)

In [8]:
from tin import TinyImageNetDataset
from torch.utils.data import DataLoader
from torchvision import transforms, models
from sklearn.metrics import precision_score, recall_score, f1_score

  from tqdm.autonotebook import tqdm


In [9]:
# Define a custom Dataset class because the dataset from load_dataset() is useless
train_data = TinyImageNetDataset(root_dir="./data/tiny-imagenet-200", mode="train")
val_data = TinyImageNetDataset(root_dir="./data/tiny-imagenet-200", mode="val")
test_data = TinyImageNetDataset(root_dir="./data/tiny-imagenet-200", mode="test")

Preloading train data...:   0%|          | 0/100000 [00:00<?, ?it/s]

Preloading val data...:   0%|          | 0/10000 [00:00<?, ?it/s]

Preloading test data...:   0%|          | 0/10000 [00:00<?, ?it/s]

In [10]:
import torch
# reduce the size of train_data by x
train_data = torch.utils.data.Subset(train_data, range(0, len(train_data), 1))
print(f"train_data size: {len(train_data)}")
val_data = torch.utils.data.Subset(val_data, range(0, len(val_data), 1))
print(f"val_data size: {len(val_data)}")
test_data = torch.utils.data.Subset(test_data, range(0, len(test_data), 1))
print(f"test_data size: {len(test_data)}")

train_data size: 100000
val_data size: 10000
test_data size: 10000


In [11]:
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True)
print(f"There are {len(train_loader)} batches in the training set")
print(f"There are {len(val_loader)} batches in the validation set")

There are 1563 batches in the training set
There are 157 batches in the validation set


In [12]:
device = None
if torch.cuda.is_available():
    device = torch.device("cuda:0")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(f"Using device: {device}")

Using device: mps


## Step 3.2.1: Model

In [85]:
import torch.nn as nn
import utils
from typing import List

class CNN_MLP(nn.Module):
    def __init__(
            self,
            dim: int,
            num_classes: int,
            layers: list,
            confs: List[dict],
            in_channels: int,
            out_channels: int,
            weight_init=None):
        super(CNN_MLP, self).__init__()

        self.net = nn.ModuleList()

        for layer, conf in zip(layers, confs):
            if layer == "C":
                self.net.append(
                    nn.Conv2d(
                        in_channels,
                        out_channels,
                        kernel_size=conf["kernel"],
                        stride=conf["stride"],
                        padding=conf["padding"]
                    )
                )
                self.net.append(nn.ReLU())
                in_channels = out_channels
                out_channels *= 2
            elif layer == "P":
                self.net.append(
                    nn.MaxPool2d(kernel_size=conf["kernel"])
                )
        
        self.flatten = nn.Flatten()
        self.in_channels = in_channels
        self.dim = utils.get_dim_after_conv_and_pool(
            dim_init=dim,
            layers=layers,
            confs=confs
        )
        print(f"self.dim: {self.dim},\nself.in_channels: {self.in_channels}")
        self.fc1 = nn.Linear(self.dim * self.dim * self.in_channels, 784)
        self.fc2 = nn.Linear(784, 196)
        self.fc3 = nn.Linear(196, num_classes)
        # Weight Initialization
        # self.initialize_weights(weight_init)
        # if weight_init == "random_uniform":
        #     assert (self.conv1.weight >= 0).all() and (self.conv1.weight <= 1).all()
        #     assert (self.fc1.weight >= 0).all() and (self.fc1.weight <= 1).all()
        # elif weight_init == "random_normal":
        #     assert torch.isclose(self.conv1.weight.mean(), torch.tensor(0.), atol=1e-2).item()
        #     assert torch.isclose(self.conv1.weight.std(), torch.tensor(1.), atol=1e-2).item()


    def forward(self, x):
        N = x.shape[0]
        C, H, W = x.shape[3], x.shape[1], x.shape[2]
        x = x.permute(0, 3, 1, 2)  # From (batch_size, H, W, C) to (batch_size, C, H, W)
        assert x.shape == (N, C, H, W)
        
        for layer in self.net:
            x = layer(x)
        x = self.flatten(x)
        x = nn.ReLU()(self.fc1(x))
        x = nn.ReLU()(self.fc2(x))
        x = self.fc3(x)

        return x


    # def initialize_weights(self, kind):
    #     if kind == "random_uniform":
    #         for m in self.modules():
    #             if isinstance(m, nn.Conv2d):
    #                 nn.init.uniform_(m.weight)
    #                 if m.bias is not None:
    #                     nn.init.uniform_(m.bias)
    #             elif isinstance(m, nn.Linear):
    #                 nn.init.uniform_(m.weight)
    #                 nn.init.uniform_(m.bias)
    #     elif kind == "random_normal":
    #         for m in self.modules():
    #             if isinstance(m, nn.Conv2d):
    #                 nn.init.normal_(m.weight)
    #                 if m.bias is not None:
    #                     nn.init.normal_(m.bias)
    #             elif isinstance(m, nn.Linear):
    #                 nn.init.normal_(m.weight)
    #                 nn.init.normal_(m.bias)
    #     elif kind == "xavier":
    #         pass
    #     elif kind == "he":
    #         pass
    #     else:
    #         raise ValueError("Invalid weight initialization kind!")

## Step 3.2.2: Model Init

In [86]:
from torchsummary import summary
import torch.optim as optim
torch.manual_seed(SEED)

layers = ["C", "P", "C", "P"]
confs = [
    {"kernel": 3, "stride": 1, "padding": 0},
    {"kernel": 2},
    {"kernel": 3, "stride": 1, "padding": 0},
    {"kernel": 2}
]
model = CNN_MLP(
    dim=64,
    num_classes=200,
    layers=layers,
    confs=confs,
    in_channels=3,
    out_channels=16
)
# x = torch.randn(5, 64, 64, 3)
# output = model(x)
# print(output.shape)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()
print(f"Epochs: {EPOCHS}\nBatch size: {BATCH_SIZE}\nLearning rate: {LEARNING_RATE}\nSeed: {SEED}\n")
print(model)
summary(model)

self.dim: 14,
self.in_channels: 32
Epochs: 20
Batch size: 64
Learning rate: 0.01
Seed: 42

CNN_MLP(
  (net): ModuleList(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=6272, out_features=784, bias=True)
  (fc2): Linear(in_features=784, out_features=196, bias=True)
  (fc3): Linear(in_features=196, out_features=200, bias=True)
)
Layer (type:depth-idx)                   Param #
├─ModuleList: 1-1                        --
|    └─Conv2d: 2-1                       448
|    └─ReLU: 2-2                         --
|    └─MaxPool2d: 2-3                    --
|    └─Conv2d: 2-4                       4,640
|    └─ReLU: 2-5                         --
|    └

Layer (type:depth-idx)                   Param #
├─ModuleList: 1-1                        --
|    └─Conv2d: 2-1                       448
|    └─ReLU: 2-2                         --
|    └─MaxPool2d: 2-3                    --
|    └─Conv2d: 2-4                       4,640
|    └─ReLU: 2-5                         --
|    └─MaxPool2d: 2-6                    --
├─Flatten: 1-2                           --
├─Linear: 1-3                            4,918,032
├─Linear: 1-4                            153,860
├─Linear: 1-5                            39,400
Total params: 5,116,380
Trainable params: 5,116,380
Non-trainable params: 0

## Step 3.2.3: Model Train

In [87]:
import wandb
# %env WANDB_LOG_MODEL="end"
%env WANDB_SILENT=true
%env PYTORCH_ENABLE_MPS_FALLBACK=1
try:
    wandb.login()
    wandb.init(project="del",
               entity="hariveliki")
    wandb.define_metric("epoch")
    wandb.define_metric("loss_train", step_metric="epoch")
    wandb.define_metric("loss_eval", step_metric="epoch")
    wandb.define_metric("train_accuracy", step_metric="epoch")
    wandb.define_metric("eval_accuracy", step_metric="epoch")
    wandb.define_metric("f1_macro", step_metric="epoch")
    wandb.define_metric("precision", step_metric="epoch")
    wandb.define_metric("recall", step_metric="epoch")

    # conv1_grads = []
    # def save_grad(module, grad_input, grad_output):
    #     conv1_grads.append(grad_output[0])
    # hook_handle = model.conv1.register_full_backward_hook(save_grad)

    model.to(device)
    for epoch in range(1, EPOCHS+1):
        print(f"|---------------------------| Start Epoch {epoch}: |---------------------------|")
        loss_train = 0
        total = 0
        correct = 0
        model.train()
        for n, batch in enumerate(train_loader):
            imgs = batch["image"]
            imgs = imgs.to(device)
            labels = batch["label"]
            labels = labels.to(device)

            # Forward pass
            labels = labels.long()
            logits = model(imgs)
            preds = nn.functional.softmax(logits, dim=1)
            loss = criterion(logits, labels)
            loss_train += loss.item()
            predicted = preds.argmax(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        loss_train /= len(train_loader)
        train_accuracy = 100 * correct / total
        print(f"Train Loss: {loss_train}")
        print(f"Train Accuracy: {train_accuracy}")

        correct = 0
        total = 0
        loss_eval = 0
        # all_labels = []
        # all_outputs = []
        model.eval()
        for n, batch in enumerate(val_loader):
            imgs = batch["image"]
            imgs = imgs.to(device)
            labels = batch["label"]
            labels = labels.to(device)

            # Forward pass
            labels = labels.long()
            logits = model(imgs)
            preds = nn.functional.softmax(logits, dim=1)
            loss_eval += criterion(logits, labels).item()
            predicted = preds.argmax(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            # correct += (preds.argmax(1) == labels).sum().item()
            # all_labels.extend(preds.argmax(1).cpu().detach().numpy().tolist())
            # all_outputs.extend(labels.cpu().detach().numpy().tolist())

        loss_eval /= len(val_loader)
        # correct /= len(val_loader)
        eval_accuracy = 100 * correct / total
        # f1_macro = f1_score(all_labels, all_outputs, average='macro')
        # precision = precision_score(all_labels, all_outputs, average='macro', zero_division=0)
        # recall = recall_score(all_labels, all_outputs, average='macro', zero_division=0)
        print(f"Eval Loss: {loss_eval}")
        print(f"Eval Accuracy: {eval_accuracy}")
        # print(f"F1 Macro: {f1_macro}")
        # print(f"Precision: {precision}")
        # print(f"Recall: {recall}")
        wandb.log(
            {
                "epoch": epoch,
                "loss_train": loss_train,
                "loss_eval": loss_eval,
                "train_accuracy": train_accuracy,
                "eval_accuracy": eval_accuracy
            }
        )

    # hook_handle.remove()
    wandb.finish()

except Exception as e:
    wandb.finish()
    raise e

except KeyboardInterrupt:
    wandb.finish()
    raise KeyboardInterrupt

env: WANDB_SILENT=true
env: PYTORCH_ENABLE_MPS_FALLBACK=1
|---------------------------| Start Epoch 1: |---------------------------|
Train Loss: 4.912283492561189
Train Accuracy: 4.325
Eval Loss: 4.558134874720483
Eval Accuracy: 7.35
|---------------------------| Start Epoch 2: |---------------------------|
Train Loss: 4.297701539096356
Train Accuracy: 10.89
Eval Loss: 4.37727343200878
Eval Accuracy: 9.3
|---------------------------| Start Epoch 3: |---------------------------|
Train Loss: 3.857043754421437
Train Accuracy: 17.445
Eval Loss: 4.20678383225848
Eval Accuracy: 13.13
|---------------------------| Start Epoch 4: |---------------------------|
Train Loss: 3.424139152790443
Train Accuracy: 24.037
Eval Loss: 4.309830746073632
Eval Accuracy: 13.23
|---------------------------| Start Epoch 5: |---------------------------|
Train Loss: 2.9049077271957544
Train Accuracy: 32.836
Eval Loss: 4.319305293878932
Eval Accuracy: 15.45
|---------------------------| Start Epoch 6: |------------



In [16]:
import matplotlib.pyplot as plt

def plot_conv_layer_gradients(batch_idx, conv_grads):
    """
    Plots the gradients of a convolutional layer for a given batch index.

    Args:
    - batch_idx (int): The index of the batch to plot the gradients for.
    - conv_grads (list): A list of the gradients of the convolutional layer.

    Returns:
    - None
    """
    # Get the gradient for that batch
    batch_grad = conv_grads[batch_idx]

    # Convert the PyTorch tensor to a NumPy array
    batch_grad_np = batch_grad.cpu().numpy()

    # Create a plot
    plt.figure(figsize=(10, 10))

    # Assuming the weight tensor is of shape (out_channels, in_channels, kernel_size, kernel_size)
    out_channels = batch_grad_np.shape[0]
    for i in range(out_channels):
        plt.subplot(4, 4, i + 1)
        plt.imshow(batch_grad_np[i, 0], cmap='viridis')
        plt.title(f'Output channel {i}')
        plt.axis('off')

    plt.tight_layout()
    plt.show()

# plot_conv_layer_gradients(0, conv1_grads)