# Examples and Calculations with Convolutions


In [None]:
import random
import torch # Pytorch is one of the most used machine learning frameworks in academia
import torch.nn as nn # nn contains the neural network functions of pytorch
import torchvision # For Image Processing
from torchvision import datasets, models, transforms
import os # load data
import numpy as np # work with matrices
from PIL import Image # Pillow is a commonly used Image Processing Library 
import matplotlib.pyplot as plt #for visualization


torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

#Loss Functions.

In Training we need Loss / Error Functions that define "how well our network is currently performing". We use backpropagation with gradient descent to minimize the loss and increase the performance of our network.


The most commonly used loss functions are Mean Squared Error and Crossentropy.

- MSE is used for regressive tasks (e.g. how big is a bounding box)
- Crossentropy is used for classification (e.g. which class is inside bounding box)




## MSE
$L_\text{MSE}=\frac{1}{n}\sum_{i=1}^n(\text{prediction}_i-\text{label}_i)^2$

Let's say we have Prediction

[2 3 4]

and label

[2 4  1]

The MSE is calculated as 

$\frac{(2-2)^2 + (3-4)^2+(4-1)^2}{3}$
= $\frac{0^2 + (-1)^2+3^2}{3} = \frac{10}{3} $

In [None]:
prediction = torch.tensor([2.0, 3.0, 4.0])
label = torch.tensor([2.0, 4.0, 1.0])
mse_criterion = nn.MSELoss()
mse_loss = mse_criterion(prediction, label)
mse_loss

## Crossentropy Loss
$L_\text{CE}=-\sum_{i=1}^n\text{label}_i \cdot \text{log} (\text{prediction}_i)$

Softmax is always applied beforehand to prediction

$\text{softmax}(z_i) = \frac{e^{z_i}}{\sum_{k} e^{z_k}}$


E.g. we have $ \text{prediction} = [4 \quad 2 \quad 1]$ and $\text{label} = [1 \quad 0 \quad 0]$

Then Softmax calculates

$[ \frac{e^4}{e^4+e^2+e^1} \quad \frac{e^2}{e^4+e^2+e^1}  \quad \frac{e^1}{e^4+e^2+e^1}]$

$[0.844 \quad  0.114 \quad 0.042]$



In [None]:
sm = nn.Softmax(dim=1)
prediction = torch.tensor([[4.0, 2.0, 1.0],])

softmaxed_pred = sm(prediction)
print("softmax of [4 2 0] is ", softmaxed_pred)

And the crossentropy loss is

$L_\text{CE}= -(1 \cdot \text{log}(0.844) + 0 \cdot \text{log}(0.114) + 0 \cdot \text{log}(0.042)) = -\text{log}(0.844) = 0.169$


In [None]:
label = torch.tensor([[1.0, 0.0, 0.0]])
ce_criterion = nn.CrossEntropyLoss() # This function automatically applies softmax
ce_loss = ce_criterion(prediction, label)
print("Crossentropy loss is", ce_loss)

# **Parameters**

Let's say we want to classify images into 5 possible classes.
The images have size $32\times32$ and are in RGB.
For this classification we want to use 2 Convolutional Layer and one fully connected layer.
- The first convolutional layer has 16 filters, stride 2 and a 5$\times$5 kernel
- The second convolutional layer has 32 filters, stride 1, a 3$\times3$ kernel
and padding to keep the same feature map resolution

Calculate the number of trainable parameters!

## Calculate parameters of convolutional Layers
Number of parameters per conv.layer can be calculated as follows:

$\text{number parameters} =\text{numberFilters} \times 
(\text{kernelWidth} \times \text{kernelHeight} \times \text{inputChannels} + 1)$

For the first convolutional layer we get:

$16 \cdot (5 \cdot 5 \cdot 3 + 1) = 1216 \text{ Parameters}$ 

For the second convolutional layer we get:

$32 \cdot  (3 \cdot 3 \cdot 16 + 1) = 4640 \text{ Parameters}$


## Calculate parameters of fully connected layer

The number of parameters in convolutional layers is independent of the input  resolution. But for the fully connected layer we need to know the resolution of the incoming feature map.

We can calculate the resolution after a convolution with:

$\text{new resolution} = ⌊\frac{\text{prev feature map size} - \text{kernel size} }{stride} + 1⌋$

Dimension of first featuremap (original image):

 $\quad 32\times32\times3$

Resolution after first convolutional layer:

$\quad ⌊\frac{32-5}{2}+1⌋ = ⌊14.5⌋ = 14$

Dimension of feature map after first convolutional layer:

$ \quad 14 \times 14 \times 16$

Second convolution uses padding to keep the resolution identical but increases number of filters. So the dimension after the second convolutional layer is:

$ \quad 14 \times 14 \times 32$

Now we can calculate the number of trainable parameters of the fully connected layer:

$\text{number parameters} = (\text{input features} + 1) \times \text{number neurons} $

which is:

$\quad (14 \cdot 14 \cdot 32 + 1) \cdot 5 = 31365$


So we have

1216 + 4640 + 31365 = 37221 parameter.





In [None]:
model = nn.Sequential(
        nn.Conv2d(
              in_channels= 3,
              out_channels=16,
              kernel_size=5,
              stride=2),
        nn.ReLU(),
        nn.Conv2d(
              in_channels= 16,
              out_channels=32,
              kernel_size=3,
              stride=1,
              padding="same",
              padding_mode = "zeros"),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(
               in_features=14*14*32,
               out_features=5),
        nn.Softmax()  
)


sum_of_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"this model has {sum_of_trainable_params} parameters")

# Transfer Learning

Using pre-trained models for new tasks to increase training speed.
We will use a model trained on ImageNet to classify images into classes that don't exist in ImageNet.

In [None]:
!unzip alienvspredator.zip

In [None]:
fig = plt.figure()
alien_root = "alienvspredator/train/alien/"
for i in range(6):
  plt.subplot(2,3,i+1)
  plt.tight_layout()
  plt.imshow(Image.open(alien_root + sorted(os.listdir(alien_root))[i]))

plt.show()

In [None]:
fig = plt.figure()
predator_root = "alienvspredator/train/predator/"
for i in range(6):
  plt.subplot(2,3,i+1)
  plt.tight_layout()
  plt.imshow(Image.open(predator_root + sorted(os.listdir(predator_root))[i]))

plt.show()

Create Pytorch Dataset.
We use the normalization of ImageNet for our images and crop them to 224x224.
We use Batchsize 32.

In [None]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

data_transforms = {
    'train':
        transforms.Compose([
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize]),
    'validation':
        transforms.Compose([
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize])}

image_datasets = {
    'train':
        datasets.ImageFolder('alienvspredator/train', data_transforms['train']),
    'validation':
        datasets.ImageFolder('alienvspredator/validation', data_transforms['validation'])}

dataloaders = {
    'train':
        torch.utils.data.DataLoader(
            image_datasets['train'],
            batch_size=32,
            shuffle=True,
            num_workers=1),
    'validation':
        torch.utils.data.DataLoader(
            image_datasets['validation'],
            batch_size=32,
            shuffle=False,
            num_workers=1)}

Load Pretrained Model

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("RUNNING ON ", device) # should be cuda

def get_model_and_optimizer(pretrained=True):
    # load model architecture from pytorch
    # if pretrained == true, then also load imagenet weights. otherwise use kaiming initialization
    model = models.efficientnet_b0(pretrained=pretrained).to(device)

    if pretrained:
        # Don't change weights of pretrained convolutional filters
        for param in model.parameters():
            param.requires_grad = False

    # create a new classier
    model.classifier = nn.Sequential(
        nn.Linear(1280, 128),
        nn.ReLU(inplace=True),
        nn.Linear(128, 2)).to(device)

    # use adam and give it the trainable parameters
    if pretrained:
        optimizer = torch.optim.Adam(model.classifier.parameters())
    else:
        optimizer = torch.optim.Adam(model.parameters())
    return model, optimizer


In [None]:
def train_model(model, optimizer, num_epochs=3):
    criterion = nn.CrossEntropyLoss()
    model.train()
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in dataloaders['train']:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(image_datasets["train"])

        print(f"epoch loss is = {epoch_loss}")
    return model

tl_model, optimizer = get_model_and_optimizer()
tl_model_trained = train_model(tl_model, optimizer, num_epochs=5)

In [None]:
def validation(model):
    with torch.no_grad():
        a_valid = "alienvspredator/validation/alien/"
        p_valid = "alienvspredator/validation/predator/"
        validation_img_paths = [a_valid + x for x in sorted(os.listdir(a_valid))]
        validation_img_paths += [p_valid + x for x in sorted(os.listdir(p_valid))]

        img_list = []
        for img_path in validation_img_paths:
            i = Image.open(img_path)
            if i.mode == "RGB":
                img_list.append(i)
                
        validation_batch = torch.stack([data_transforms['validation'](img).to(device)
                                        for img in img_list])

        pred_logits_tensor = model(validation_batch)
        sm = nn.Softmax(dim=1)
        pred_probs = sm(pred_logits_tensor).cpu().data.numpy()
        alien_argmax = (-pred_probs[:,0]).argsort()[:5]

        pred_argmax = (-pred_probs[:,1]).argsort()[:5]



        fig, axs = plt.subplots(2, 5, figsize=(20, 5))

        for idx, i in enumerate(alien_argmax):
            img = img_list[i]
            ax = axs[0,idx]
            ax.axis('off')
            ax.set_title("{:.0f}% Alien, {:.0f}% Predator".format(100*pred_probs[i,0],
                                                                100*pred_probs[i,1]))
            ax.imshow(img)

        for idx, i in enumerate(pred_argmax):
            img = img_list[i]
            ax = axs[1,idx]
            ax.axis('off')
            ax.set_title("{:.0f}% Alien, {:.0f}% Predator".format(100*pred_probs[i,0],
                                                                100*pred_probs[i,1]))
            ax.imshow(img)

validation(tl_model_trained)

Training should be much slower and worse if we don't use pretrained weights. Let's try it with default Kaiming initialization.

In [None]:
default_model, optimizer = get_model_and_optimizer(False)
default_model_trained = train_model(default_model, optimizer, num_epochs=5)
validation(default_model_trained)