In [None]:
import sys
import io
import urllib
import requests
import json

from PIL import Image
import cv2 
from skimage.transform import resize
import matplotlib.pyplot as plt
import ipywidgets as ipy
import numpy as np
from torchvision import models, transforms, utils
from torch.autograd import Variable, Function
import torch
import torchvision

In [None]:
def preprocess_image(img):
    means = [0.485, 0.456, 0.406]
    stds = [0.229, 0.224, 0.225]

    preprocessed_img = np.expand_dims((img - means)/stds, axis=0)
    preprocessed_tensor =torch.from_numpy(preprocessed_img).permute(0, 3, 1, 2).type(torch.FloatTensor)

    preprocessed_var = Variable(preprocessed_tensor, requires_grad=True)
    return preprocessed_var

def deprocess_image(image_tensor):
    means = [0.485, 0.456, 0.406]
    stds = [0.229, 0.224, 0.225]
    
    img = np.transpose(image_tensor.numpy(), (1, 2, 0))
    img = (img*stds)+means
    img = np.clip(img, 0.0, 1.0)
    img = img.astype(np.float32)
    return img

In [None]:
%matplotlib inline

In [None]:
class IgnoreNeptune(object):
    def channel_send(self, *args):
        pass
    
    def channel_reset(self, *args):
        pass
    
# ctx = IgnoreNeptune() # uncomment if you are running it without neptune

## ImageNet labels

[ImageNet](http://image-net.org/) is a large database with images pertaining to 1 of 1000 classes. We load the class names into a dictionary in the cell below.

In [None]:
LABELS_URL = 'https://gist.githubusercontent.com/yrevar/942d3a0ac09ec9e5eb3a/raw/c2c91c8e767d04621020c30ed31192724b863041/imagenet1000_clsid_to_human.txt'
labels = eval(requests.get(LABELS_URL).text)

## Download image from URL

The function below retrieves a file by URL and tries to read it as an instance of `PIL.Image` class.

In [None]:
def download_and_read_image(url):
    """Download and read image as an instance of PIL.Image class."""
    image_url = urllib.request.urlopen(url)
    image = Image.open(io.BytesIO(image_url.read()))
    return image

In [None]:
image = download_and_read_image(
    'https://i.ytimg.com/vi/UXOt4LRLajY/hqdefault.jpg'
)

In [None]:
plt.figure(figsize=(10, 10))
plt.imshow(image)

In [None]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],
                          std = [ 0.229, 0.224, 0.225 ]),
])

In [None]:
image.size

In [None]:
image_processed = transform(image)

In [None]:
plt.imshow(deprocess_image(image_processed))

## Load a pretrained network from PyTorch library

Here we will use pretrained [VGG](https://arxiv.org/abs/1409.1556) network to generate label for a given image. You can choose this model (among other ones) from PyTorch [model library](http://pytorch.org/docs/master/torchvision/models.html).

First, we load the model and its pretrained weights. We also set it in evaluation state by calling `eval()` function on it.

In [None]:
net = models.vgg19_bn()
net.load_state_dict(torch.load('/public/models/pytorch/vgg/vgg19_bn-c79401a0.pth'))
net.eval()

In [None]:
activations = net(Variable(image_processed.unsqueeze(0)))

In [None]:
activations

In [None]:
torch.max(activations, 1)

In [None]:
_, i = torch.max(activations, 1)

In [None]:
i = int(i.data.numpy())

In [None]:
plt.figure(figsize=(10, 10))
plt.imshow(deprocess_image(image_processed))
plt.title("{}: {}".format(i, labels[i].upper()))

## Exercise
* Get five most likely labels for the given image

## But why did you say that?

As neural networks are considered black box models, there was some effort to increase their transparency. One example is [Grad-CAM](https://arxiv.org/pdf/1610.02391v1.pdf) method for generating class activation heatmaps.

In [None]:
USE_CUDA = torch.cuda.is_available()
CONVO_EXTRACTOR_NR = '52'

### Building calculation graph
* Building feature extractor
* Building gradient and activation extractor

In [None]:
class FeatureExtractor():
    """ Class for extracting activations and 
    registering gradients from targetted intermediate layers """

    def __init__(self, model, target_layers):
        self.model = model
        self.model.eval()
        self.target_layers = target_layers
        self.gradients = []

    def save_gradient(self, grad):
        self.gradients.append(grad)

    def __call__(self, x):
        outputs = []
        self.gradients = []
        for name, module in self.model._modules.items():
            x = module(x)
            if name in self.target_layers:
                x.register_hook(self.save_gradient)
                outputs += [x]
        return outputs, x
    
class ModelOutputs():
    """ Class for making a forward pass, and getting:
    1. The network output.
    2. Activations from intermeddiate targetted layers.
    3. Gradients from intermeddiate targetted layers. """

    def __init__(self, model, target_layers):
        self.model = model
        self.model.eval()
        self.feature_extractor = FeatureExtractor(self.model.features, target_layers)

    def get_gradients(self):
        return self.feature_extractor.gradients

    def __call__(self, x):
        target_activations, output = self.feature_extractor(x)
        output = output.view(output.size(0), -1)
        output = self.model.classifier(output)
        return target_activations, output

### Extracting heatmap
* Building one hot objective function for chosen class 
* calculating grads and output activations for a given model and image 
* building heatmap

In [None]:
def show_cam_on_image(img, mask):
    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
    heatmap = np.float32(heatmap) / 255
    cam = heatmap + np.float32(img)
    cam = cam / np.max(cam)
    return cam


class GradCam:
    def __init__(self, model, target_layer_names, use_cuda):
        self.model = model
        self.model.eval()
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()

        self.extractor = ModelOutputs(self.model, target_layer_names)

    def forward(self, input):
        return self.model(input)

    def __call__(self, input, index=None):
        if self.cuda:
            features, output = self.extractor(input.cuda())
        else:
            features, output = self.extractor(input)
        
        index_max = np.argmax(output.cpu().data.numpy())
        if index == None:
            index = index_max

        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
        one_hot[0][index] = 1
        one_hot = Variable(torch.from_numpy(one_hot), requires_grad=True)
        if self.cuda:
            one_hot = torch.sum(one_hot.cuda() * output)
        else:
            one_hot = torch.sum(one_hot * output)

        self.model.features.zero_grad()
        self.model.classifier.zero_grad()
        one_hot.backward(retain_graph=True)

        grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy()

        target = features[-1]
        target = target.cpu().data.numpy()[0, :]

        weights = np.mean(grads_val, axis=(2, 3))[0, :]
        cam = np.ones(target.shape[1:], dtype=np.float32)

        for i, w in enumerate(weights):
            cam += w * target[i, :, :]

        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, (224, 224))
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        return cam, index, index_max

### Plotting

In [None]:
def category_search(categories, text=None, index=None, by_text=True):
    if by_text:
        options = {category:index for index,category in categories.items() if text in category}
        return options
    else:
        return categories[index]
    
def plot_grad_cam(img, model, categories, target_index=None):
    grad_cam = GradCam(model = model, target_layer_names = [CONVO_EXTRACTOR_NR], use_cuda=USE_CUDA)

    input = preprocess_image(img)

    mask, target_index, target_max = grad_cam(input, target_index)
    reversed_mask = 1.0 - mask
    img_cam = show_cam_on_image(img, reversed_mask)
    
    plt.figure(figsize=(16,10))
    plt.subplot(1,2,1)
    plt.title((category_search(categories, index=target_max, by_text=False)))
    plt.imshow(img)
    plt.subplot(1,2,2)
    plt.title((category_search(categories, index=target_index, by_text=False)))
    plt.imshow(img_cam)    
    plt.show()
    
def visualize_heatmaps(net, image_numpy, labels):
    @ipy.interact(text='leop')
    def explore_heatmaps(text):
        options = category_search(labels, text=text)
        dropdown = ipy.Dropdown(
            options=list(options.keys()),
            value=list(options.keys())[0]
        )
        button = ipy.ToggleButton(
            description='Generate Heatmap',
            value=False
            )      
        def get_index(chosen, generate):
            if generate:
                index = [k for k,v in labels.items() if v == chosen][0]
                plot_grad_cam(image_numpy, net, labels, target_index=index)

        display(ipy.interactive(get_index, chosen=dropdown, generate=button))

In [None]:
image_numpy = deprocess_image(image_processed)

In [None]:
visualize_heatmaps(net, image_numpy, labels)

# Part 2: DIY

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

## Data

We will work with [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html): a benchmark dataset for image recognition. There are ten classes of objects on the images with 50K examples in the training set and 10K in the testset.

In [None]:
trainset = torchvision.datasets.CIFAR10('/public/cifar/', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10('/public/cifar/', train=False, transform=transform)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

## Defining your own network

In PyTorch, each model needs to inherit from the `nn.Module` class. Thanks to it, we can use powerful set of PyTorch's methods for differentation (and, in turn, model training).

In [None]:
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim

In [None]:
class Net(nn.Module):
    """Neural network model."""
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc_size = 16 * 5 * 5
        self.fc1 = nn.Linear(self.fc_size, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):               # Height and width and of x:
        x = F.relu(self.conv1(x))       # 32 - 5 + 1 = 28
        x = self.pool(x)                # 28 / 2 = 14
        x = F.relu(self.conv2(x))       # 14 - 5 + 1 = 10
        x = self.pool(x)                # 10 / 2 = 5
        x = x.view(-1, self.fc_size)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
def performance_stats(net, dataloader, use_gpu=False):
    """Computes performance statistics - logloss and accuracy."""
    correct = 0
    total = 0
    logloss = 0.0
    for i, data in enumerate(dataloader, 0):
        images, labels = data
        if use_gpu:
            images, labels = images.cuda(), labels.cuda()
        outputs = net(Variable(images, volatile=True))
        logloss += F.cross_entropy(outputs, Variable(labels, volatile=True), size_average=False)
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum()
        total += labels.size(0)
    acc = float(correct) / total
    logloss /= total
    return acc, logloss.data[0], total

In [None]:
def highest_loss_img(images, labels, outputs, use_gpu):
    """Determines the image with the highest logloss in a given batch."""
    probs = F.softmax(outputs)
    if use_gpu:
        np_probs = probs.data.cpu().numpy()
        np_labels = labels.data.cpu().numpy()
    else:
        np_probs = probs.data.numpy()
        np_labels = labels.data.numpy()
    predicted_prob = np.array([prob[label] for prob, label in zip(np_probs, np_labels)])
    i = np.argmin(predicted_prob)
    label_pred = np.argmax(np_probs[i])
    img = images[i] / 2 + 0.5 # Rescale to the original values
    if use_gpu:
        img = img.cpu()
    pil_img = transforms.ToPILImage()(img.data)
    return pil_img, np_labels[i], predicted_prob[i], label_pred, np_probs[i,label_pred]

In [None]:
def training(net, optimizer, criterion, trainloader, testloader, num_epoch, logging_window=1000, use_gpu=True, experiment_name=''):
    n_iter = 0
    """Main function for model training."""
    # Channels need to be reset on another invocation of training() function.
    # At first, a warning will be printed as these channels do not exists.
    ctx.channel_reset(experiment_name + ' training set running loss')
    ctx.channel_reset(experiment_name + ' training set accuracy')
    ctx.channel_reset(experiment_name + ' training set logloss')
    ctx.channel_reset(experiment_name + ' test set accuracy')
    ctx.channel_reset(experiment_name + ' test set logloss')
    ctx.channel_reset(experiment_name + ' image with the highest logloss in a batch')
    for epoch in range(num_epoch):    
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            images, labels = data
            if use_gpu:
                images, labels = images.cuda(), labels.cuda()
            images, labels = Variable(images), Variable(labels)
            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            if i % logging_window == (logging_window - 1):
                n_iter += logging_window
                ctx.channel_send(experiment_name + ' training set running loss', n_iter, running_loss / logging_window)
                running_loss = 0.0
                img, label, prob, label_pred, prob_pred = highest_loss_img(images, labels, outputs, use_gpu=use_gpu)
                ctx.channel_send(experiment_name + ' image with the highest logloss in a batch', neptune.Image(
                  name=classes[label],
                  description='True label: %s (p = %.4f)\nPredicted:  %s (p = %.4f)' % 
                    (classes[label], prob, classes[label_pred], prob_pred),
                  data=img.resize((128, 128))))
        
        # Post epoch statistics
        acc_train, logloss_train, total_train = performance_stats(net, trainloader, use_gpu)
        print('------------------------------')
        print(epoch, acc_train, logloss_train, total_train)
        ctx.channel_send(experiment_name + ' training set accuracy', epoch, acc_train)
        ctx.channel_send(experiment_name + ' training set logloss', epoch, logloss_train)

        acc_test, logloss_test, total_test = performance_stats(net, testloader, use_gpu)
        print(epoch, acc_test, logloss_test, total_test)
        ctx.channel_send(experiment_name + ' test set accuracy', epoch, acc_test)
        ctx.channel_send(experiment_name + ' test set logloss', epoch, logloss_test)

## Data

Each model needs some fuel to work. Here, it is images that we feed the network. 

You can display some examples using code below.

In [None]:
batch_size = 16

In [None]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

In [None]:
batch_size * len(testloader), batch_size * len(trainloader)

In [None]:
def imshow(img):
    img = img / 2 + 0.5     # Unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

In [None]:
dataiter = iter(trainloader)
images, labels = dataiter.next()
plt.figure(figsize=(8, 8))

# Show images
imshow(torchvision.utils.make_grid(images, nrow=8))

## Optimizing parameters

Now we are going to use backpropagation to train the model using data. First, we need a loss function and an optimization routine.

In [None]:
net = Net()
net.cuda()

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
training(net, optimizer, criterion, trainloader, testloader, num_epoch=20, logging_window=100,
         experiment_name='Basic', use_gpu=True)

## Exercises
* Play with `batch_size` parameter and see how it ifluences training.
* Replace the first 5x5 convolutional filters with two 3x3 filters. How does it affect the number of the model's parameters?
* Augment training data by randomly flipping images before passing them to your network.
* Modify the network architecture to achieve accuracy over 80% on the test set.

# Part 3: Transfer learning
In a typical computer vision project one uses some model pretrained on a large set of data like we saw in the first part. 

We will use such a model and finetune it for our data.

In [None]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [None]:
net = models.vgg19_bn()
net.load_state_dict(torch.load('/public/models/pytorch/vgg/vgg19_bn-c79401a0.pth'))

In [None]:
net

## Essentials
* Substitute the classifier part of the network
* Train with lower learning rate
* Freeze lower layers if needed

## Exercise
Implement a classifier that works with Cifar 10.

In [None]:
net.classifier = nn.Sequential(
    nn.Linear(512, 1024),
    nn.ReLU(inplace=True),
    nn.Dropout(0.5),
    nn.Linear(1024, 10)
)

In [None]:
net.cuda()

In [None]:
def performance_stats(net, dataloader, use_gpu=False):
    """Computes performance statistics - logloss and accuracy - for a given dataset."""
    correct = 0
    total = 0
    logloss = 0.0
    for i, data in enumerate(dataloader, 0):
        images, labels = data
        if use_gpu:
            images, labels = images.cuda(), labels.cuda()
        outputs = net(Variable(images, volatile=True))
        logloss += F.cross_entropy(outputs, Variable(labels, volatile=True), size_average=False)
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum()
        total += labels.size(0)
    acc = float(correct) / total
    logloss /= total
    return acc, logloss.data[0], total

In [None]:
def training(net, optimizer, criterion, trainloader, testloader, num_epoch, logging_window=1000, use_gpu=True, experiment_name=''):
    n_iter = 0
    """Main function for model training."""
    # Channels need to be reset on another invocation of training() function.
    # At first, a warning will be printed as these channels do not exists.
    ctx.channel_reset(experiment_name + ' training set running loss')
    ctx.channel_reset(experiment_name + ' training set accuracy')
    ctx.channel_reset(experiment_name + ' training set logloss')
    ctx.channel_reset(experiment_name + ' test set accuracy')
    ctx.channel_reset(experiment_name + ' test set logloss')
    for epoch in range(num_epoch):    
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):            
            images, labels = data
            if use_gpu:
                images, labels = images.cuda(), labels.cuda()
            images, labels = Variable(images), Variable(labels)
            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            if i % logging_window == (logging_window - 1):
                n_iter += logging_window
                ctx.channel_send(experiment_name + ' training set running loss', n_iter, running_loss / logging_window)
                running_loss = 0.0
                print(i)
        
        # Post epoch statistics
        acc_train, logloss_train, total_train = performance_stats(net, trainloader, use_gpu)
        print('------------------------------')
        print(epoch, acc_train, logloss_train, total_train)
        ctx.channel_send(experiment_name + ' training set accuracy', epoch, acc_train)
        ctx.channel_send(experiment_name + ' training set logloss', epoch, logloss_train)

        acc_test, logloss_test, total_test = performance_stats(net, testloader, use_gpu)
        print(epoch, acc_test, logloss_test, total_test)
        ctx.channel_send(experiment_name + ' test set accuracy', epoch, acc_test)
        ctx.channel_send(experiment_name + ' test set logloss', epoch, logloss_test)

In [None]:
batch_size = 64

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [None]:
trainset = torchvision.datasets.CIFAR10('/public/cifar/', download=True, train=True, transform=transform)
testset = torchvision.datasets.CIFAR10('/public/cifar/', download=True, train=False, transform=transform)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.000025)

In [None]:
training(net, optimizer, criterion, trainloader, testloader, num_epoch=1, logging_window=100, experiment_name="Transfer learning")