# CNNs: Building a CNNs from scratch

In [None]:
import torchvision
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm, trange
import torchvision.transforms as transforms
import time

In [None]:
!pip install pytorch-ignite

In [None]:
from ignite.handlers import FastaiLRFinder
from ignite.engine import create_supervised_trainer

## Inspect the Flowers102 dataset

In [None]:
dataset = torchvision.datasets.Flowers102('data/', download=True, split='train')

In [None]:
dataset[0]

In [None]:
fig, ax = plt.subplots(5, 5, figsize=(10, 10))
for i in range(25):
  ax[i//5, i%5].imshow(dataset[i][0])
  ax[i//5, i%5].axis('off')
  ax[i//5, i%5].set_title(f'Class: {dataset[i][1]}')
plt.show()

In [None]:
label_count = dict()
for img, label in dataset:
  label_count[label] = label_count.get(label, 0) + 1

In [None]:
print('Number of classes')
len(label_count.keys())

## Image classification with AlexNet

In order to get a better understanding how CNNs work, we will build and train a neural network from scratch. As base architecture will will use the AlexNet.

Introduced in 2012, AlexNet stands as a cornerstone in deep learning, ushering in a new era in image recognition with its convolutional neural network (CNN) architecture. Although there are superior architecture nowadays, being easy to implement, it's a good starting point for getting an understanding how CNNs work.

In [None]:
DEVICE = 'cuda'
BATCH_SIZE = 64

### Helper functions

In [None]:
def plot_accuracy(stats, title=None):

  epochs = [item['epoch'] for item in stats]
  train_accs = [item['train_acc'] for item in stats]
  val_accs = [item['val_acc'] for item in stats]

  plt.plot(epochs, train_accs, label='Train Acc')
  plt.plot(epochs, val_accs, label=f'Val Acc [Best: {max(val_accs):.2f}%]')
  plt.legend()

  if title:
    plt.title(title)

In [None]:
def train(model, train_loader, optimizer, criterion, num_epochs, val_loader=None):

  '''
  Trains the model on the dataloader for a given number of epochs
   '''

  print('===== Start training ===== \n')

  model.train()

  start = time.time()

  stats = []

  for epoch in range(1, num_epochs+1):

    epoch_loss = 0

    for x, y in train_loader:

      optimizer.zero_grad()

      x = x.to(DEVICE)
      y = y.to(DEVICE)

      y_pred = model(x)

      loss = criterion(y_pred, y)

      loss.backward()
      optimizer.step()

      epoch_loss += loss.item()

    print(f'[{epoch}] Loss: {epoch_loss:.3f}')

    if epoch % 5 == 0:
      train_acc = eval_accuracy(model, train_loader)
      if val_loader:
        val_acc = eval_accuracy(model, val_loader)
        print(f'[{epoch}] Train Acc: {train_acc:.2f}%  /  Val Acc: {val_acc:.2f}%')
        stats.append({'epoch': epoch, 'train_acc': train_acc, 'val_acc': val_acc})
      else:
        print(f'[{epoch}] Train Acc: {train_acc:.2f}%')
        stats.append({'epoch': epoch, 'train_acc': train_acc})


  end = time.time()
  elapsed_time = end - start

  print()
  print('===== Finished training ===== ')
  print(f'Elapsed time in minutes: {elapsed_time/60:.2f}')

  return stats

In [None]:
def eval_accuracy(model, loader):

    '''
    Measure the accuracy of the given model on the provided dataloader
    '''

    epoch_acc = 0

    model.eval()

    num_corr_pred = 0
    num_total_pred = 0

    with torch.no_grad():

        for x, y in loader:

            x = x.to(DEVICE)
            y = y.to(DEVICE)

            y_pred = model(x)

            top_pred = y_pred.argmax(1)
            num_corr_pred += (top_pred == y).sum()
            num_total_pred += len(y)

    acc = num_corr_pred / num_total_pred * 100

    return acc.item()

### Implementing AlexNet from scratch

AlexNet comprises five convolutional layers. These layers are responsible for extracting various features from the input images. For the following explaination we asssume an input image of size 224x224.

**Feature Extractor:**
- Convolution [F=96, K=11x11, S=4, P=2] => 96x55x55
- Max Pool [K=3x3, S=2, P=0] => 96x27x27
- Convolution [F=192, K=5x5, S=1, P=2] => 192x27x27
- Max Pool [K=3x3, S=2, P=0] => 192x13x13
- Convolution [F=384, K=3x3, S=1, P=1] => 384x13x13
- Convolution [F=256, K=3x3, S=1, P=1] => 256x13x13
- Convolution [F=256, K=3x3, S=1, P=1] => 256x13x13
- Max Pool [K=3x3, S=2, P=0] => 256x6x6

**Classification Head:**
- Linear Layer [256*6*6=9216,4096]
- Linear Layer [4096,4096]
- Linear Layer [4096,102]

**Hint:**

Formula to calculate the output size of a convolutional layer:

$O = \lfloor\frac{I-K+2P}{S}\rfloor +1$

Formula to calculate the output size of a pooling layer:

$O = \lfloor\frac{I-K+2P}{S}\rfloor +1$


In [None]:
# TODO Build AlexNet

In [None]:
# TODO: Feed a tensor through the network to test the model

### Prepare the train, val and test set

In [None]:
# TODO: Prepare the transforms (No augmentation) + Train/Val/Test dataset

In [None]:
# TODO: Prepare the dataloaders

### Learning rate range test to estimate a good initial learning rate

In [None]:
start_lr = 1e-7
end_lr = 1e+1
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
trainer = create_supervised_trainer(model, optimizer, criterion, DEVICE)

lr_finder = FastaiLRFinder()
to_save = {"model": model, "optimizer": optimizer}

with lr_finder.attach(trainer, to_save=to_save, start_lr=start_lr, end_lr=end_lr, num_iter=200) as trainer_with_lr_finder:
    trainer_with_lr_finder.run(train_loader)

# Get lr_finder results
lr_finder.get_results()

# Plot lr_finder results (requires matplotlib)
lr_finder.plot()

# get lr_finder suggestion for lr
lr_finder.lr_suggestion()

### Train the model (without augmentation)

In [None]:
# TODO: Launch training

### Plot accuracy

In [None]:
plot_accuracy(stats, title='AlexNet from scratch [No augmentation]')

In [None]:
test_acc = eval_accuracy(model, test_loader)
print(f'Accuracy on the test set (final model): {test_acc:.2f}%')

## Training AlexNet with augmentation

### Enable augmentation

In [None]:
# TODO: Modify the train transform to use augmentations

### Training the model

In [None]:
# TODO: Train the model

### Plot accuracy

In [None]:
plot_accuracy(stats, title='AlexNet from scratch [With augmentation]')

In [None]:
test_acc = eval_accuracy(model, test_loader)
print(f'Accuracy on the test set (final model): {test_acc:.2f}%')