In [None]:
!pip install torchvision opencv-python colorama

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import numpy as np
import torch
import cv2
import time
import colorama

import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['figure.figsize'] = [20, 10]

import torch
import torchvision
import torchvision.transforms as transforms

### Loading and normalizing CIFAR10

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 16

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
num_classes = len(classes)

In [None]:
# functions to show an image

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(train_loader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))

<https://pytorch.org/docs/stable/nn.html#conv2d>

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

### Print a network summary

In [None]:
net

### Retrieve trainable parameters

In [None]:
named_params = list(net.named_parameters())
print("len(params): %s\n" % len(named_params))
for name, param in named_params:
    print("%s:\t%s" % (name, param.shape))

### Feed network with a random input

In [None]:
input = torch.randn(1, 3, 32, 32)  # batch_size, num_channels, height, width
out = net(input)
print("Log-Probabilities: \n%s\n" % out)
print("Probabilities: \n%s\n" % torch.exp(out))
print("out.shape: \n%s" % (out.shape,))

### How can we now actually train our CNN?

In [None]:
def train_cnn(model, train_loader, test_loader, device, num_epochs=2, lr=0.1):

    # define an optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss()
    
    for epoch in range(num_epochs):
        print("="*40, "Starting epoch %d" % (epoch + 1), "="*40)
        
        model.train()  # reset to train mode after accuracy computation
                
        # dataloader returns batches of images for 'data' and a tensor with their respective labels in 'labels'
        for batch_idx, (data, labels) in enumerate(train_loader):
            data, labels = data.to(device), labels.to(device)

            optimizer.zero_grad()
            
            output = model(data)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            
            
            if batch_idx % 315 == 0:
                print("Batch %d/%d, Loss=%.4f" % (batch_idx, len(train_loader), loss.item()))

        train_acc = accuracy(model, train_loader, device)
        test_acc = accuracy(model, test_loader, device)
        print(colorama.Fore.GREEN, "\nAccuracy on training: %.2f%%" % (100*train_acc))
        print("Accuracy on test: %.2f%%" % (100*test_acc), colorama.Fore.RESET)

### Evaluate model's accuracy on train/test data

In [None]:
def accuracy(model, dataloader, device):
    """ Computes the model's accuracy on the data provided by 'dataloader'
    """
    model.eval()
    
    num_correct = 0
    num_samples = 0
    with torch.no_grad():  # deactivates autograd, reduces memory usage and speeds up computations
        for data, labels in dataloader:
            data, labels = data.to(device), labels.to(device)

            predictions = model(data).max(1)[1]  # indices of the maxima along the second dimension
            num_correct += (predictions == labels).sum().item()
            num_samples += predictions.shape[0]
        
    return num_correct / num_samples

### Start the training!

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = net.to(device)

train_cnn(net, train_loader, test_loader, device, lr=2e-3)

### Let's look at some of the model's predictions

In [None]:
def visualize_predictions(model, dataloader, device):
    data, labels = next(iter(dataloader))
    data, labels = data[:10].to(device), labels[:10]
    predictions = model(data).max(1)[1]
    
    predictions, data = predictions.cpu(), data.cpu()
    
    for i in range(10):
        img = data.squeeze(1)[i]
        img = img / 2 + 0.5     # unnormalize
        npimg = img.numpy()
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

        plt.xlabel(classes[predictions[i].item()], fontsize=18)
        plt.xticks([])
        plt.yticks([])    
        plt.show()
visualize_predictions(net, test_loader, device)

# Exercise: Create your own version of VGG11

VGG has 2 different parts: backbone and classifier

An image of the architecture:
https://i.imgur.com/uLXrKxe.jpg

Some usefull documentation:
* https://pytorch.org/docs/stable/nn.html#torch.nn.Sequential
* https://pytorch.org/docs/stable/nn.html#torch.nn.Conv2d
* https://pytorch.org/docs/stable/nn.html#torch.nn.ReLU
* https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout
  


In [None]:
# VGG11 model from torchvision

import torchvision.models as models
net_to_implement = models.vgg11()
net_to_implement

# input size of the first conv layer should be 1 instead of 3!

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class VGG11(nn.Module):

    def __init__(self, backbone, classifier):
        super().__init__()   
        self.backbone = backbone
        self.adaptativeAveragePool = nn.AdaptiveAvgPool2d(output_size=(7,7))
        self.classifier = classifier
        
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.adaptativeAveragePool(x)
        x = x.view(x.size(0), -1) #flatten the image 
        x = self.classifier(x)
        return x

In [None]:
def get_backbone():
    pass

In [None]:
def get_classifier(num_classes):
    pass

In [None]:
# instantiate and print your Net
net = VGG11(get_backbone(), get_classifier(num_classes))
net

In [None]:
# Train the model
num_epochs = 1 # start small to see if your model works, initially

# call train and record the loss over time
train_cnn(net, train_loader, test_loader, device, num_epochs=num_epochs, lr=2e-3)