In [1]:
import torch
import torchvision
from torch import nn, optim

from torchsummary import summary

In [2]:
batch_size = 32
epoch = 30
learning_rate = 0.01

In [3]:
trans = torchvision.transforms.ToTensor()

train_data = torch.utils.data.DataLoader(
            torchvision.datasets.MNIST(
            'mnist_data', train=True, download=True, transform=trans
            ), batch_size=batch_size
            )
val_data = torch.utils.data.DataLoader(
            torchvision.datasets.MNIST(
            'mnist_data', train=False, download=True, transform=trans
            ), batch_size=batch_size)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_data\MNIST\raw\train-images-idx3-ubyte.gz


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting mnist_data\MNIST\raw\train-images-idx3-ubyte.gz to mnist_data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_data\MNIST\raw\train-labels-idx1-ubyte.gz


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting mnist_data\MNIST\raw\train-labels-idx1-ubyte.gz to mnist_data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_data\MNIST\raw\t10k-images-idx3-ubyte.gz


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting mnist_data\MNIST\raw\t10k-images-idx3-ubyte.gz to mnist_data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist_data\MNIST\raw\t10k-labels-idx1-ubyte.gz


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting mnist_data\MNIST\raw\t10k-labels-idx1-ubyte.gz to mnist_data\MNIST\raw



In [4]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=3, stride=1)
        self.maxpool1 = nn.MaxPool2d((2,2))
        self.conv2 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1)
        self.maxpool2 = nn.MaxPool2d((2,2))
        
        self.tanh = nn.Tanh()
        self.linear1 = nn.Linear(150, 10)

    def forward(self, x):
        x = self.maxpool1(self.tanh(self.conv1(x)))
        x = self.maxpool2(self.tanh(self.conv2(x)))
        x = x.view(x.shape[0], -1)
        x = self.linear1(x)
        return x

In [5]:
def validate(model, data):
    total = 0
    correct = 0
    for i, (images, labels) in enumerate(data):
        y_pred = model(images)
        value, pred = torch.max(y_pred, 1)
        total += y_pred.size(0)
        correct += torch.sum(pred == labels)
    return correct * 100 / total

In [6]:
convnet = ConvNet()
summary(convnet, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 3, 26, 26]              30
              Tanh-2            [-1, 3, 26, 26]               0
         MaxPool2d-3            [-1, 3, 13, 13]               0
            Conv2d-4            [-1, 6, 11, 11]             168
              Tanh-5            [-1, 6, 11, 11]               0
         MaxPool2d-6              [-1, 6, 5, 5]               0
            Linear-7                   [-1, 10]           1,510
Total params: 1,708
Trainable params: 1,708
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.05
Params size (MB): 0.01
Estimated Total Size (MB): 0.06
----------------------------------------------------------------


In [7]:
%%time

optimizer = optim.Adam(convnet.parameters(), lr=learning_rate)
cross_entropy = nn.CrossEntropyLoss()

for n in range(epoch):
    for i, (images, labels) in enumerate(train_data):
        optimizer.zero_grad()
        prediction = convnet(images)
        loss = cross_entropy(prediction, labels)
        loss.backward()
        optimizer.step()
    accuracy = float(validate(convnet, val_data))
    print("Epoch:", n+1, "Loss: ", float(loss.data), "Val. Accuracy:", accuracy)

Epoch: 1 Loss:  0.12514452636241913 Val. Accuracy: 95.05000305175781
Epoch: 2 Loss:  0.07508008927106857 Val. Accuracy: 96.16000366210938
Epoch: 3 Loss:  0.02775716222822666 Val. Accuracy: 96.37999725341797
Epoch: 4 Loss:  0.02378271147608757 Val. Accuracy: 96.7300033569336
Epoch: 5 Loss:  0.06893365830183029 Val. Accuracy: 96.51000213623047
Epoch: 6 Loss:  0.06561765819787979 Val. Accuracy: 96.37999725341797
Epoch: 7 Loss:  0.018896516412496567 Val. Accuracy: 96.69000244140625
Epoch: 8 Loss:  0.035781122744083405 Val. Accuracy: 96.72000122070312
Epoch: 9 Loss:  0.020696386694908142 Val. Accuracy: 96.69000244140625
Epoch: 10 Loss:  0.04571831226348877 Val. Accuracy: 96.72000122070312
Epoch: 11 Loss:  0.04973458871245384 Val. Accuracy: 96.80999755859375
Epoch: 12 Loss:  0.04461900144815445 Val. Accuracy: 96.68000030517578
Epoch: 13 Loss:  0.09133164584636688 Val. Accuracy: 96.72000122070312
Epoch: 14 Loss:  0.1170617863535881 Val. Accuracy: 96.88999938964844
Epoch: 15 Loss:  0.062706790

We can see that adding pooling layers reduced number parameters and increased the validation accuracy.