# Intro to Convolutional Neural Networks with PyTorch
## Applied Machine Learning
### SmartGateML

In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

Le'ts see if we have any GPUs at our disposal

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [3]:
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data/',
                                           train=True, 
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data/',
                                          train=False, 
                                          transform=transforms.ToTensor(),
                                          download=True)

Let's see what we've got

In [4]:
print(train_dataset)
print(test_dataset)

Dataset MNIST
    Number of datapoints: 60000
    Split: train
    Root Location: ./data/
    Transforms (if any): ToTensor()
    Target Transforms (if any): None
Dataset MNIST
    Number of datapoints: 10000
    Split: test
    Root Location: ./data/
    Transforms (if any): ToTensor()
    Target Transforms (if any): None


In [5]:
# Hyper parameters. Please define them for me

num_epochs = 5 
batch_size = 50
num_classes = 10
lr = 0.001

In [6]:
# Data loader: Combines a dataset and a sampler, and provides a convenient iterators over the dataset.
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

Let's write a Convolutional Neural Network with two convolutional layers, followed by two fully connected layers.

nn.Module is the base class for all neural network modules in PyTorch. Our model should be a subclass of nn.Module this class.

class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, ...)
* in_channels (int): Number of channels in the input image
* out_channels (int): Number of channels produced by the convolution
* kernel_size (int or tuple): Size of the convolving kernel
* stride (int or tuple, optional): Stride of the convolution. Default: 1
* padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0

This might help https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md to get intuition.

**Please write a ConvNet module which contains**
* 2 convolutioal layers with
    * 16 filters/kernels, each having size 5 
    * stride = 1
    * padding = 2
* each is followed bynormalise by 
    * batch normalisation
    * ReLU non-linearity
    * MaxPool layer with kernel_size=2, stride=2

* 2 fully connected layers
    1. x -> 64, followed by ReLU and Dropout (50% p)
    2. 64 -> y
    
you may want to make use of nn.Sequential()    
    
Here is why one should use Batch-Norm:
* Improves gradient flow, used on very deep models (Resnet need this)
* Allow higher learning rates
* Reduce dependency on initialization
* Gives some kind of regularization 

In [11]:
class ConvNet(nn.Module):
    def __init__(self, num_classes=10):
        super(ConvNet, self).__init__()
        
        self.layer1 = nn.Sequential(
          nn.Conv2d(1, 16, 3, 1, 1),
          nn.BatchNorm2d(16),
          nn.ReLU(),
          nn.MaxPool2d(2, 2))
        
        self.layer2 = nn.Sequential(
          nn.Conv2d(16, 32, 3, 1, 1),
          nn.BatchNorm2d(32),
          nn.ReLU(),
          nn.MaxPool2d(2, 2))

        self.fc1 = nn.Linear(7*7*32, 64)
        self.drop = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(64, 10)
    def forward(self, x):
        
        out = self.layer1(x)
        out = self.layer2(out)
        
        # out.reshape(-1, 7*7*32)
        out = out.view(-1, 7*7*32)
        
        out = F.relu(self.fc1(out))
        out = self.drop(out)
        out = F.relu(self.fc2(out))
        return out

In [12]:
model = ConvNet(num_classes).to(device)
print(model)

ConvNet(
  (layer1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=1568, out_features=64, bias=True)
  (drop): Dropout(p=0.5)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)


In [13]:
# Loss and optimizer
# Use CrossEntropyLoss and Adam
criterion = nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99))

In [14]:
from torch.autograd import Variable

# Train the model
total_step = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        # Make consistent with the current device
        images = images.to(device)
        labels = labels.to(device)
        
        # x, target = Variable(images), Variable(labels)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % batch_size == 0:
            print ('Epoch # [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch # [1/5], Step [50/1200], Loss: 1.3791
Epoch # [1/5], Step [100/1200], Loss: 1.0802
Epoch # [1/5], Step [150/1200], Loss: 0.7579
Epoch # [1/5], Step [200/1200], Loss: 0.5601
Epoch # [1/5], Step [250/1200], Loss: 0.3120
Epoch # [1/5], Step [300/1200], Loss: 0.6409
Epoch # [1/5], Step [350/1200], Loss: 0.1891
Epoch # [1/5], Step [400/1200], Loss: 0.2286
Epoch # [1/5], Step [450/1200], Loss: 0.2804
Epoch # [1/5], Step [500/1200], Loss: 0.4529
Epoch # [1/5], Step [550/1200], Loss: 0.2912
Epoch # [1/5], Step [600/1200], Loss: 0.1741
Epoch # [1/5], Step [650/1200], Loss: 0.1638
Epoch # [1/5], Step [700/1200], Loss: 0.1219
Epoch # [1/5], Step [750/1200], Loss: 0.2013
Epoch # [1/5], Step [800/1200], Loss: 0.3407
Epoch # [1/5], Step [850/1200], Loss: 0.1488
Epoch # [1/5], Step [900/1200], Loss: 0.1614
Epoch # [1/5], Step [950/1200], Loss: 0.1901
Epoch # [1/5], Step [1000/1200], Loss: 0.1189
Epoch # [1/5], Step [1050/1200], Loss: 0.1456
Epoch # [1/5], Step [1100/1200], Loss: 0.1612
Epoch # 

In [42]:
# Test the model

model.eval()  

# This sets the module in evaluation mode, which affects certain modules (e.g. BatchNorm, Dropout). 

with torch.no_grad(): # Why do we need this?
    
    correct = 0
    total = 0
    
    for images, labels in test_loader:
        out = model(images)
        _, p = torch.max(out.data, dim=1)
        
        total += labels.data.size()[0]
        
        correct += (p == labels.data).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))


Test Accuracy of the model on the 10000 test images: 98.83 %


In [63]:
# Lets print a prediction
pilTrans = torchvision.transforms.ToPILImage()

# we're predicting on 50-sized batch, so our input to model should have 50x1x28x28 size
def predict_single_image_label(test_image_idx: int) -> torch.FloatTensor:
    test_image, test_target = test_dataset[test_image_idx]
    
    print("test_target: ", test_target_eighty)
    print("test_image: ", test_image_eighty.size())
    pilImg = pilTrans(test_image)
    pilImg.show()
    
    test_image.unsqueeze_(-1)
    test_image = test_image.expand(1,28,28,50)
    test_image = test_image.transpose(3, 0)
    test_image = test_image.transpose(3, 1)
    test_image = test_image.transpose(3, 2)
    # print("test_image as batch: ", test_image.size())

    out = model(test_image)
    _, predict = torch.max(out.data, dim=1)
    return predict[0]

print("test_prediction: ", predict_single_image_label(79))
print("test_prediction: ", predict_single_image_label(5))

test_target:  tensor(7)
test_image:  torch.Size([50, 1, 28, 28])
test_prediction:  tensor(7)
test_target:  tensor(7)
test_image:  torch.Size([50, 1, 28, 28])
test_prediction:  tensor(1)
