# Step 1: Select Task/Dataset
I chose Tiny ImageNet, which contains 100000 images of 200 classes (500 for each class) resized to 64x64 color images.

In [1]:
from datasets import load_dataset
from classes import i2d
import json

In [2]:
dataset = load_dataset('Maysee/tiny-imagenet', split='train')

Found cached dataset parquet (/Users/haris.alic/.cache/huggingface/datasets/Maysee___parquet/Maysee--tiny-imagenet-2eb6c3acd8ebc62a/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


In [3]:
dataset[0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=64x64>,
 'label': 0}

In [4]:
with open("dataset_infos.json") as file:
    dataset_infos = json.load(file)

In [5]:
class_names = dataset_infos["Maysee--tiny-imagenet"]["features"]["label"]["names"]
idx2class = {i: class_names[i] for i in range(len(class_names))}

# Step 2: Get to know the data
The dataset is well balanced and has 500 images for each class

In [6]:
# from collections import defaultdict

# class_counts = defaultdict(int)
# for instance in dataset:
#     label = instance['label']
#     class_counts[label] += 1

# for label, count in class_counts.items():
#     print(f"Class {label}: {count} instances")


# Step 3: Structure Modeling

### Step 3.1 Determine how (with which metrics) you want to evaluate your model. Also, consider the error in estimating the metrics.
We will use accuracy, precision, recall, and F1 score (macro) to evaluate our model.

### Step 3.2 Implement basic functionality to train models and evaluate them against each other. It is recommended to use a suitable MLOps platform (e.g. W&B)

In [7]:
%env WANDB_LOG_MODEL="end"
%env WANDB_SILENT=true
%env PYTORCH_ENABLE_MPS_FALLBACK=1

env: WANDB_LOG_MODEL="end"
env: WANDB_SILENT=true
env: PYTORCH_ENABLE_MPS_FALLBACK=1


In [8]:
# checkpoint = "mc1-cnn-mlp"
# wandb_group = checkpoint.split("-")[0]
# wandb_name = "-".join(checkpoint.split("-")[1:])
# print(f"wandb_group: {wandb_group}")
# print(f"wandb_name: {wandb_name}")

In [9]:
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from tin import TinyImageNetDataset
from torch.utils.data import DataLoader
from torchvision import transforms

In [10]:
# Define a custom Dataset class because the dataset from load_dataset() is useless
train_data = TinyImageNetDataset(root_dir="./data/tiny-imagenet-200", mode="train")
val_data = TinyImageNetDataset(root_dir="./data/tiny-imagenet-200", mode="val")
test_data = TinyImageNetDataset(root_dir="./data/tiny-imagenet-200", mode="test")

Preloading train data...:   0%|          | 0/100000 [00:00<?, ?it/s]

Preloading val data...:   0%|          | 0/10000 [00:00<?, ?it/s]

Preloading test data...:   0%|          | 0/10000 [00:00<?, ?it/s]

In [11]:
# reduce the size of train_data by x
train_data = torch.utils.data.Subset(train_data, range(0, len(train_data), 500))
print(f"train_data size: {len(train_data)}")
val_data = torch.utils.data.Subset(val_data, range(0, len(val_data), 100))
print(f"val_data size: {len(val_data)}")
test_data = torch.utils.data.Subset(test_data, range(0, len(test_data), 100))
print(f"test_data size: {len(test_data)}")

train_data size: 200
val_data size: 100
test_data size: 100


In [12]:
batch_size = 1
trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
print(f"There are {len(trainloader)} batches in the training set")

There are 200 batches in the training set


In [13]:
device = None
if torch.cuda.is_available():
    device = torch.device("cuda:0")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(f"Using device: {device}")

Using device: mps


In [14]:
class CNN_MLP(nn.Module):
    def __init__(self, num_classes=200):
        super(CNN_MLP, self).__init__()
        # Convolutional Layer that takes an input tensor with 3 channels
        # and outputs a tensor with 16 channels
        self.conv1 = nn.Conv2d(in_channels=3,
                               out_channels=16,
                               kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=16,
                               out_channels=32,
                               kernel_size=3)
        # Flattens the input tensor into a 1D tensor
        self.flatten = nn.Flatten()
        # Fully connected layers
        self.fc1 = nn.Linear(32 * 14 * 14, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
    
    def forward(self, x):
        batch_size = x.shape[0]
        x = x.permute(0, 3, 1, 2)  # From (batch_size, H, W, C) to (batch_size, C, H, W)
        assert x.shape == (batch_size, 3, 64, 64)
        
        # # TODO understand -> using ReLU (non-saturating activation functions) to alleviate the vanishing gradients problem
        x = self.conv1(x)
        assert x.shape == (batch_size, 16, 62, 62)
        x = nn.ReLU()(x)
        # Applies max-pooling to reduce the spatial dimensions of the tensor
        x = nn.MaxPool2d(kernel_size=2)(x)
        assert x.shape == (batch_size, 16, 31, 31)

        x = self.conv2(x)
        assert x.shape == (batch_size, 32, 29, 29)
        x = nn.ReLU()(x)
        x = nn.MaxPool2d(kernel_size=2)(x)
        assert x.shape == (batch_size, 32, 14, 14)

        x = self.flatten(x)

        x = nn.ReLU()(self.fc1(x))
        x = nn.ReLU()(self.fc2(x))
        x = self.fc3(x)

        return x

In [15]:
model = CNN_MLP()
optimizer = optim.SGD(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
print(model)
# model.to(device)

CNN_MLP(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=6272, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=200, bias=True)
)


In [18]:
EPOCHS = 5
wandb.login()
wandb.init(project="del",
           config={"epochs": EPOCHS})

for epoch in range(EPOCHS):
    for n, batch in enumerate(trainloader):
        imgs = batch["image"]
        assert imgs.shape == (batch_size, 64, 64, 3)
        labels = batch["label"]
        assert labels.shape == (batch_size,)

        # Forward pass
        labels = labels.long()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"loss: {loss}")
    wandb.log({"loss": loss})

wandb.finish()

loss: 0.00040534863364882767
loss: 3.0874729418428615e-05
loss: 0.049919359385967255
loss: 1.6760648488998413
loss: 1.105322241783142


