# Step 1: Select Task/Dataset
I chose Tiny ImageNet, which contains 100000 images of 200 classes (500 for each class) resized to 64x64 color images.

In [1]:
from datasets import load_dataset
from classes import i2d
import json

In [2]:
dataset = load_dataset('Maysee/tiny-imagenet', split='train')

Found cached dataset parquet (/Users/haris.alic/.cache/huggingface/datasets/Maysee___parquet/Maysee--tiny-imagenet-2eb6c3acd8ebc62a/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


In [3]:
dataset[0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=64x64>,
 'label': 0}

In [4]:
with open("dataset_infos.json") as file:
    dataset_infos = json.load(file)

In [5]:
class_names = dataset_infos["Maysee--tiny-imagenet"]["features"]["label"]["names"]
idx2class = {i: class_names[i] for i in range(len(class_names))}

# Step 2: Get to know the data
The dataset is well balanced and has 500 images for each class

In [6]:
# from collections import defaultdict

# class_counts = defaultdict(int)
# for instance in dataset:
#     label = instance['label']
#     class_counts[label] += 1

# for label, count in class_counts.items():
#     print(f"Class {label}: {count} instances")


# Step 3: Structure Modeling

### Step 3.1 Determine how (with which metrics) you want to evaluate your model. Also, consider the error in estimating the metrics.
We will use accuracy and F1 macro (precision, recall) to evaluate our model.

### Step 3.2 Implement basic functionality to train models and evaluate them against each other. It is recommended to use a suitable MLOps platform (e.g. W&B)

In [7]:
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from tin import TinyImageNetDataset
from torch.utils.data import DataLoader
from torchvision import transforms
from sklearn.metrics import precision_score, recall_score, f1_score

In [8]:
# Define a custom Dataset class because the dataset from load_dataset() is useless
train_data = TinyImageNetDataset(root_dir="./data/tiny-imagenet-200", mode="train")
val_data = TinyImageNetDataset(root_dir="./data/tiny-imagenet-200", mode="val")
test_data = TinyImageNetDataset(root_dir="./data/tiny-imagenet-200", mode="test")

Preloading train data...:   0%|          | 0/100000 [00:00<?, ?it/s]

Preloading val data...:   0%|          | 0/10000 [00:00<?, ?it/s]

Preloading test data...:   0%|          | 0/10000 [00:00<?, ?it/s]

In [9]:
# reduce the size of train_data by x
train_data = torch.utils.data.Subset(train_data, range(0, len(train_data), 1))
print(f"train_data size: {len(train_data)}")
val_data = torch.utils.data.Subset(val_data, range(0, len(val_data), 1))
print(f"val_data size: {len(val_data)}")
test_data = torch.utils.data.Subset(test_data, range(0, len(test_data), 1))
print(f"test_data size: {len(test_data)}")

train_data size: 100000
val_data size: 10000
test_data size: 10000


In [10]:
BATCH_SIZE = 8
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True)
print(f"There are {len(train_loader)} batches in the training set")
print(f"There are {len(val_loader)} batches in the validation set")

There are 12500 batches in the training set
There are 1250 batches in the validation set


In [11]:
device = None
if torch.cuda.is_available():
    device = torch.device("cuda:0")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(f"Using device: {device}")

Using device: mps


In [12]:
class CNN_MLP(nn.Module):
    def __init__(self, num_classes=200, weight_init="random_uniform"):
        super(CNN_MLP, self).__init__()
        # Convolutional Layer that takes an input tensor with 3 channels
        # and outputs a tensor with 16 channels
        self.conv1 = nn.Conv2d(in_channels=3,
                               out_channels=16,
                               kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=16,
                               out_channels=32,
                               kernel_size=3)
        # Flattens the input tensor into a 1D tensor
        self.flatten = nn.Flatten()
        # Fully connected layers
        self.fc1 = nn.Linear(32 * 14 * 14, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        # Weight Initialization
        self.initialize_weights(weight_init)
        if weight_init == "random_uniform":
            assert (self.conv1.weight >= 0).all() and (self.conv1.weight <= 1).all()
            assert (self.fc1.weight >= 0).all() and (self.fc1.weight <= 1).all()
        elif weight_init == "random_normal":
            assert torch.isclose(self.conv1.weight.mean(), torch.tensor(0.), atol=1e-2).item()
            assert torch.isclose(self.conv1.weight.std(), torch.tensor(1.), atol=1e-2).item()


    def forward(self, x):
        batch_size = x.shape[0]
        x = x.permute(0, 3, 1, 2)  # From (batch_size, H, W, C) to (batch_size, C, H, W)
        assert x.shape == (batch_size, 3, 64, 64)
        
        # # TODO understand -> using ReLU (non-saturating activation functions) to alleviate the vanishing gradients problem
        x = self.conv1(x)
        assert x.shape == (batch_size, 16, 62, 62)
        x = nn.ReLU()(x)
        # Applies max-pooling to reduce the spatial dimensions of the tensor
        x = nn.MaxPool2d(kernel_size=2)(x)
        assert x.shape == (batch_size, 16, 31, 31)

        x = self.conv2(x)
        assert x.shape == (batch_size, 32, 29, 29)
        x = nn.ReLU()(x)
        x = nn.MaxPool2d(kernel_size=2)(x)
        assert x.shape == (batch_size, 32, 14, 14)

        x = self.flatten(x)

        x = nn.ReLU()(self.fc1(x))
        x = nn.ReLU()(self.fc2(x))
        x = self.fc3(x)

        return x


    def initialize_weights(self, kind):
        if kind == "random_uniform":
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.uniform_(m.weight)
                    if m.bias is not None:
                        nn.init.uniform_(m.bias)
                elif isinstance(m, nn.Linear):
                    nn.init.uniform_(m.weight)
                    nn.init.uniform_(m.bias)
        elif kind == "random_normal":
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.normal_(m.weight)
                    if m.bias is not None:
                        nn.init.normal_(m.bias)
                elif isinstance(m, nn.Linear):
                    nn.init.normal_(m.weight)
                    nn.init.normal_(m.bias)
        elif kind == "xavier":
            pass
        elif kind == "he":
            pass
        else:
            raise ValueError("Invalid weight initialization kind!")

In [13]:
model = CNN_MLP(num_classes=200, weight_init="random_uniform")

optimizer = optim.SGD(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()
print(model)

CNN_MLP(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=6272, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=200, bias=True)
)


In [14]:
# %env WANDB_LOG_MODEL="end"
%env WANDB_SILENT=true
%env PYTORCH_ENABLE_MPS_FALLBACK=1

env: WANDB_SILENT=true
env: PYTORCH_ENABLE_MPS_FALLBACK=1


In [17]:
EPOCHS = 10
wandb.login()
wandb.init(project="del",
           entity="hariveliki")
wandb.define_metric("epoch")
wandb.define_metric("loss_train", step_metric="epoch")
wandb.define_metric("loss_eval", step_metric="epoch")
wandb.define_metric("accuracy", step_metric="epoch")
wandb.define_metric("f1_macro", step_metric="epoch")
wandb.define_metric("precision", step_metric="epoch")
wandb.define_metric("recall", step_metric="epoch")

# conv1_grads = []
# def save_grad(module, grad_input, grad_output):
#     conv1_grads.append(grad_output[0])
# hook_handle = model.conv1.register_full_backward_hook(save_grad)

model.to(device)
for epoch in range(EPOCHS):
    print(f"|---------------------------| Start Epoch {epoch + 1}: |---------------------------|")
    model.train()
    for n, batch in enumerate(train_loader):
        imgs = batch["image"]
        imgs = imgs.to(device)
        labels = batch["label"]
        labels = labels.to(device)

        # Forward pass
        labels = labels.long()
        logits = model(imgs)
        loss_train = criterion(logits, labels)

        # Backward pass
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
    print(f"Train Loss: {loss_train}")

    correct = 0
    total = 0
    loss_eval = 0
    all_labels = []
    all_outputs = []
    model.eval()
    for n, batch in enumerate(val_loader):
        imgs = batch["image"]
        imgs = imgs.to(device)
        labels = batch["label"]
        labels = labels.to(device)

        # Forward pass
        labels = labels.long()
        logits = model(imgs)
        preds = nn.functional.softmax(logits, dim=1)
        loss_eval += criterion(logits, labels).item()
        correct += (preds.argmax(1) == labels).sum().item()
        all_labels.extend(preds.argmax(1).cpu().detach().numpy().tolist())
        all_outputs.extend(labels.cpu().detach().numpy().tolist())

    loss_eval /= len(train_loader)
    correct /= len(val_data)
    accuracy = 100 * correct
    f1_macro = f1_score(all_labels, all_outputs, average='macro')
    precision = precision_score(all_labels, all_outputs, average='macro', zero_division=0)
    recall = recall_score(all_labels, all_outputs, average='macro', zero_division=0)
    print(f"Eval Loss: {loss_eval}")
    print(f"Accuracy: {accuracy}")
    print(f"F1 Macro: {f1_macro}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    wandb.log(
        {"loss_train": loss_train,
         "epoch": epoch,
         "loss_eval": loss_eval,
         "accuracy": accuracy,
         "f1_macro": f1_macro,
         "precision": precision,
         "recall": recall}
    )

# hook_handle.remove()
wandb.finish()

|---------------------------| Start Epoch 1: |---------------------------|
Train Loss: 5.434091567993164
Eval Loss: 0.533295962600708
Accuracy: 0.5
F1 Macro: 4.975124378109453e-05
Precision: 0.005
Recall: 2.5e-05
|---------------------------| Start Epoch 2: |---------------------------|
Train Loss: 5.341231346130371
Eval Loss: 0.5332529539108276
Accuracy: 0.5
F1 Macro: 4.975124378109453e-05
Precision: 0.005
Recall: 2.5e-05
|---------------------------| Start Epoch 3: |---------------------------|
Train Loss: 5.221280574798584
Eval Loss: 0.5332105222702026
Accuracy: 0.5
F1 Macro: 4.975124378109453e-05
Precision: 0.005
Recall: 2.5e-05
|---------------------------| Start Epoch 4: |---------------------------|
Train Loss: 5.344844341278076
Eval Loss: 0.5331686963272094
Accuracy: 0.5
F1 Macro: 4.975124378109453e-05
Precision: 0.005
Recall: 2.5e-05
|---------------------------| Start Epoch 5: |---------------------------|
Train Loss: 5.310181617736816
Eval Loss: 0.5331276106262207
Accuracy: 



In [16]:
import matplotlib.pyplot as plt

def plot_conv_layer_gradients(batch_idx, conv_grads):
    """
    Plots the gradients of a convolutional layer for a given batch index.

    Args:
    - batch_idx (int): The index of the batch to plot the gradients for.
    - conv_grads (list): A list of the gradients of the convolutional layer.

    Returns:
    - None
    """
    # Get the gradient for that batch
    batch_grad = conv_grads[batch_idx]

    # Convert the PyTorch tensor to a NumPy array
    batch_grad_np = batch_grad.cpu().numpy()

    # Create a plot
    plt.figure(figsize=(10, 10))

    # Assuming the weight tensor is of shape (out_channels, in_channels, kernel_size, kernel_size)
    out_channels = batch_grad_np.shape[0]
    for i in range(out_channels):
        plt.subplot(4, 4, i + 1)
        plt.imshow(batch_grad_np[i, 0], cmap='viridis')
        plt.title(f'Output channel {i}')
        plt.axis('off')

    plt.tight_layout()
    plt.show()

# plot_conv_layer_gradients(0, conv1_grads)

| None                                                                                                                                                                                                                                                                                                                                                                              | Hyperparameter                 | SGD  | REG   | BN    | ADAM  | Weight Init    | Theorie Expectation | Actual | Conclusion |
| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------ | ---- | ----- | ----- | ----- | -------------- | ------------------- | ------ | ---------- |
| ![[Pasted image 20230926162413.png]] | Epochs: 5 <br/> LR: 1e-5 <br/> | True | False | False | False | Random Uniform | None                | None   | None       |
|                                                                                                                                                                                                                                                                                                                                                                                   |                                |      |       |       |       |                |                     |        |            |