<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Build-a-DataLoader" data-toc-modified-id="Build-a-DataLoader-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Build a <code>DataLoader</code></a></span></li><li><span><a href="#Building-a-Convnet" data-toc-modified-id="Building-a-Convnet-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Building a Convnet</a></span></li><li><span><a href="#Training-model" data-toc-modified-id="Training-model-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Training model</a></span></li></ul></div>

In [1]:
import numpy as np 
import pandas as pd 

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.autograd import Variable

from sklearn.model_selection import train_test_split


In [2]:
df = pd.read_csv('../input/train.csv')
print(df.shape)

(42000, 785)


In [3]:
y = df['label'].values
X = df.drop(['label'],1).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)

In [4]:
print(y_test.shape)

(6300,)


## Build a `DataLoader`

In [5]:
BATCH_SIZE = 32

torch_X_train = torch.from_numpy(X_train).type(torch.LongTensor)
torch_y_train = torch.from_numpy(y_train).type(torch.LongTensor) # data type is long

# create feature and targets tensor for test set.
torch_X_test = torch.from_numpy(X_test).type(torch.LongTensor)
torch_y_test = torch.from_numpy(y_test).type(torch.LongTensor) # data type is long

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(torch_X_train, torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test, torch_y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

In [6]:
torch_X_train

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])

In [7]:
torch_X_train.dtype

torch.int64

## Building a Convnet

In order to know the input dimensions of the dense layers we need to find the ouptut shape of the convolution and maxpooling layers

In [19]:

conv1 = nn.Conv2d(in_channels=1, 
                  out_channels=20,
                  kernel_size=5,
                  stride=1)
        
conv2 = nn.Conv2d(in_channels=20, 
                  out_channels=50,
                  kernel_size=5,
                  stride=1)

n_batch = 10
x = torch.tensor(X_train[0:n_batch]).float()
x = x.reshape(n_batch,1,28,28)

print(x.shape)
x = F.relu(conv1(x))
print(x.shape)
x = F.max_pool2d(x, 2, 2)
print(x.shape)
x = F.relu(conv2(x))
print(x.shape)
x = F.max_pool2d(x, 2, 2)
print(x.shape) # (((28-4)/2)-4)/2

torch.Size([10, 1, 28, 28])
torch.Size([10, 20, 24, 24])
torch.Size([10, 20, 12, 12])
torch.Size([10, 50, 8, 8])
torch.Size([10, 50, 4, 4])


- Since there is no padding each time a kernel is applied the output volume will be shrinked substracting kernel_size -1 to its height and width.

- Each maxpool layer will divide by 2 the volume size


In summary

- input size        `n_batchx1x28x28`
- Output conv1:     `n_batch x n_filters_conv1 x 24 x 24`, the 24 comes from 28-(5-1)
- Output maxpool1:  `n_batch x n_filters_conv1 x 12 x 12`, the 12 comes from 24/2   
- Output conv2:     `n_batch x n_filters_conv2 x 8 x 8`, the 8 comes from 12 - (5-1)
- Output maxpool2:  `n_batch x n_filters_conv2 x 4 x 4`, the 4 comes from 8/2



In [9]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    """
    ConvNet -> Max_Pool -> RELU -> ConvNet -> Max_Pool -> RELU 
            -> FC -> RELU -> FC -> SOFTMAX
    """
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, 
                               out_channels=20,
                               kernel_size=5,
                               stride=1)
        
        # H x W x C conv1  28 x 28 x 20
        self.conv2 = nn.Conv2d(in_channels=20, 
                               out_channels=50,
                               kernel_size=5,
                               stride=1)
        
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

    
# RuntimeError: Given groups=1, 
# weight of size [6, 3, 5, 5],  expected input[10, 1, 28, 28]
# to have 3 channels, but got 1 channels instead

In [10]:
model = Net()
print(model)

Net(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)


## Training model

In [12]:
import torch.optim as optim
loss_func   = nn.CrossEntropyLoss()
optimizer   = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
var_X_batch = torch.Tensor(X_train[0:10]).float()
var_X_batch.shape

torch.Size([10, 784])

Note that the model takes as input a tensor of size [n_batch, 1, 28,28]

In [13]:
var_X_batch = torch.Tensor(X_train[0:10]).float()
var_X_batch = var_X_batch.reshape(10,1,28,28)
var_X_batch.shape

torch.Size([10, 1, 28, 28])

In [14]:
def fit(model, train_loader, optimizer, loss_func, n_epochs):
    model.train()
    n_batch = train_loader.batch_size
    for epoch in range(n_epochs):
        correct = 0
        for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
            n_batch = len(y_batch)
            var_X_batch = Variable(X_batch).float()
            var_X_batch = var_X_batch.reshape(n_batch,1,28,28)
            var_y_batch = Variable(y_batch)
            optimizer.zero_grad()
            output = model(var_X_batch)
            loss_minibatch = loss_func(output, var_y_batch)
            loss_minibatch.backward()
            optimizer.step()

            # Total correct predictions
            predicted = torch.max(output.data, 1)[1] 
            correct += (predicted == var_y_batch).sum()
            #print(correct)
            if batch_idx % 50 == 0:
                print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                    epoch, batch_idx*len(X_batch), len(train_loader.dataset),
                    100.*batch_idx / len(train_loader),
                    loss_minibatch.data.item(),
                    float(correct*100) / float(n_batch*(batch_idx+1))))
                

In [15]:
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
fit(model, train_loader, optimizer, loss_func, n_epochs=2)



In [16]:
def evaluate(model):
    #model = mlp
    correct = 0 
    for test_imgs, test_labels in test_loader:
        test_imgs = Variable(test_imgs).float()
        test_imgs = test_imgs.reshape(len(test_labels),1,28,28)
        output = model(test_imgs)
        predicted = torch.max(output,1)[1]
        correct += (predicted == test_labels).sum()
    print("Test accuracy:{:.3f}% ".format( float(correct) / (len(test_loader)*BATCH_SIZE)))

In [17]:
evaluate(model)

Test accuracy:0.975% 


Another architecture

In [18]:
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        #self.conv1 = nn.Conv2d(1, 4, kernel_size = 3, stride=1, padding=1)
        self.pool  = nn.MaxPool2d(2, 2)
        #self.conv2 = nn.Conv2d(4, 4, kernel_size = 3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(6, 16, 5)
        #import pdb;pdb.set_trace()
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x
    

