# 4. Convolutional networks

- Used as part of INFO8010 Deep Learning (Gilles Louppe, 2018-2019).
- Originally adapted from [Pytorch tutorial for Deep Learning researchers](https://github.com/yunjey/pytorch-tutorial) (Yunvey Choi, 2018).

---

In [None]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch 
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

# Hyper-parameters

In [None]:
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

# Data

In [None]:
# MNIST Dataset (Images and Labels)
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())

# Dataset Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

# Model

In [None]:
# Model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc = nn.Linear(7*7*32, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

model = CNN()

# Loss and optimizer

In [None]:
# Loss and Optimizer
# Softmax is internally computed.
# Set parameters to be updated.
criterion = nn.CrossEntropyLoss()  
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  

# Training the Model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' 
                  % (epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss))

# Test the model

In [None]:
# Test the Model
model.eval()  # Change model to 'eval' mode (BN uses moving mean/var).
correct = 0
total = 0

for images, labels in test_loader:
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
    
print('Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))

<div class="alert alert-success">
<b>EXERCISE</b>:

Write two convolutional networks, designed as:
<ul>
    <li>INPUT→[CONV→RELU→POOL]x2→FC→RELU→FC</li>
    <li>INPUT→[[CONV→RELU]x2→POOL]x3→[FC→RELU]x2→FC</li>
</ul>

</div>

In [None]:
# Your code

<div class="alert alert-success">
<b>EXERCISE</b>:

Count the number of parameters of these networks

</div>

In [None]:
# Your code

<div class="alert alert-success">
<b>EXERCISE</b>:

Which one works best on MNIST?

</div>

In [None]:
# Your code

<div class="alert alert-success">
<b>EXERCISE</b>:

Evaluate the effect of the number of convolutional kernels on the final model performance.

</div>

In [None]:
# Your code

<div class="alert alert-success">
<b>EXERCISE</b>:

Train a convolutional neural network on <code>dsets.CIFAR10</code>.

</div>

In [None]:
# Your code

# Using a pretrained model

In [None]:
# Download and load pretrained vgg16.
import torchvision

We load vgg16 a CNN trained on imagenet. 
![VGG16:](http://zike.io/upload_img/cnn/vgg-16-receptive-field.png)

In [None]:
vgg16 = torchvision.models.vgg16(pretrained=True).eval()

## Simple "real life" example with a pre-trained neural network.

We download a mapping between the class label as an integer into text.

In [None]:
!wget https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json
import json
class_idx = json.load(open("imagenet_class_index.json"))
idx2label = np.array([class_idx[str(k)][1] for k in range(len(class_idx))])

vgg16 was trained on imagenet dataset which inputs have been normalized as follows.

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), 
                                                                            std=(0.229, 0.224, 0.225))])

In [None]:
from PIL import Image
im = Image.open(#LIVE)
im = transform(im)
print(im.shape)
probas = nn.Softmax()(vgg16(im.unsqueeze(0))).view(-1)
sorted_ouputs = torch.argsort(probas, descending=True)
for i in range(5):
    print("Class label: {:s} - Probability: {:4f}".format(
        idx2label[sorted_ouputs[i]], probas[sorted_ouputs[i]]))

## Fine-tuning of pre-trained neural network

In [None]:
# If you want to finetune only top layer of the model.
for param in vgg16.parameters():
    param.requires_grad = True
    
# Replace top layer for finetuning, e.g. for CIFAR10.
vgg16.classifier = nn.Linear(?, ?)  

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(count_parameters(vgg16))

<div class="alert alert-success">
<b>EXERCISE</b>:

Investigate the output that is given by the network.
Is it interpretable in terms of probabilities? If not, transform it in a way that matches what is done in [deep learning](https://en.wikipedia.org/wiki/Softmax_function).

</div>

In [None]:
# For test.
images = torch.randn(10, 3, 224, 224)
outputs = vgg16(images)
print(outputs)  
# Your code

<div class="alert alert-success">
<b>EXERCISE</b>:

Retrain the new head on CIFAR10.

</div>

In [None]:
# Your code

## Filter Visualization

<div class="alert alert-success">
<b>EXERCISE</b>:

Now that you know how to load a pretrained convolutional neural network such as VGG16, you would maybe like to vizualize what the features extracted by these pre-trained networks are. 
Take some time to understand and play with the following code.
</div>

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
class SaveFeatures():
    def __init__(self, module):
        self.hook = module.register_forward_hook(self.hook_fn)
    def hook_fn(self, module, input, output):
        self.features = output.requires_grad_(True).to(device)
    def close(self):
        self.hook.remove()

In [None]:
def deprocess_image(x):
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    x = np.clip(x, 0, 255).astype('uint8')
    return x

In [None]:
class FilterVisualizer():
    def __init__(self, size=100):
        self.size = size
        self.model = vgg16.features.train().to(device)

    def visualize(self, layer, filter, lr=1., opt_steps=10):
        sz = self.size
        img = torch.from_numpy(np.random.random((1, 3, sz, sz)) * 20 + 128.)  # generate random image
        activations = SaveFeatures(list(self.model.children())[layer])  # register hook

        img_var = torch.tensor(img).float().to(device).requires_grad_(True)  # convert image to Variable that requires grad
        optimizer = torch.optim.SGD([img_var], lr=lr)
        for n in range(opt_steps):  # optimize pixel values for opt_steps times
            optimizer.zero_grad()  
            self.model(img_var)
            loss = -activations.features[0, filter].mean()
            loss.backward()
            # We renormalize the gradient to avoid too small/big steps.
            img_var.grad = img_var.grad/(img_var.grad**2).mean().sqrt()
            optimizer.step()
            if n % 20 == 0:
              print(n)
        self.output = img_var[0].permute(1, 2, 0).detach().cpu().data.numpy()
        print(self.output.shape)
        self.output = deprocess_image(self.output)
        self.save(layer, filter)
        activations.close()
        
    def save(self, layer, filter):
        plt.imsave("layer_"+str(layer)+"_filter_"+str(filter)+".jpg", self.output)

In [None]:
import matplotlib.image as mpimg

layer = 1
filter = 9

FV = FilterVisualizer(size=100)
FV.visualize(layer, filter, lr=1., opt_steps=100)

img = mpimg.imread("layer_"+str(layer)+"_filter_"+str(filter)+".jpg")
plt.figure(figsize=(10,10))
plt.grid(None)
plt.imshow(img)