# High-level PyTorch MNIST Example

In [1]:
import os
import sys
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
from torchvision import datasets, transforms
from torch.autograd import Variable
from common.params import *
from common.utils import *

In [2]:
print(torch.__version__)
print(np.__version__)

0.1.12_2
1.13.1


In [3]:
def create_lenet():
    class LenetModel(nn.Module):
        def __init__(self):
            super(LenetModel, self).__init__()
            self.conv1 = nn.Conv2d(1, 20, kernel_size=5)
            self.conv2 = nn.Conv2d(20, 50, kernel_size=5)
            # feature map size is 4*4 by pooling
            self.fc1 = nn.Linear(50*4*4, 500)
            self.fc2 = nn.Linear(500, 10)

        def forward(self, x):
            x = F.max_pool2d(F.tanh(self.conv1(x)), kernel_size=2, stride=2)
            x = F.max_pool2d(F.tanh(self.conv2(x)), kernel_size=2, stride=2)
            x = x.view(-1, 50*4*4)   # reshape Variable
            x = F.tanh(self.fc1(x))
            x = self.fc2(x)
            return F.log_softmax(x)
    return LenetModel()

In [4]:
def init_model():
    opt = optim.SGD(model.parameters(),
                    lr=LR,
                    momentum=MOMENTUM,
                    weight_decay=0, 
                    nesterov=False)
    return opt

In [5]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = mnist_for_library(channel_first=True)
# Torch-specific
y_train = y_train.astype(np.int64)
y_test = y_test.astype(np.int64)

CPU times: user 236 ms, sys: 208 ms, total: 444 ms
Wall time: 346 ms


In [6]:
%%time
model = create_lenet()
model.cuda() # CUDA!

CPU times: user 1.8 s, sys: 528 ms, total: 2.32 s
Wall time: 2.5 s


In [7]:
model

LenetModel (
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (800 -> 500)
  (fc2): Linear (500 -> 10)
)

In [8]:
%%time
optimizer = init_model()

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 84.6 µs


In [9]:
%%time
# Sets training = True
model.train()  
for j in range(EPOCHS):
    for data, target in yield_mb(x_train, y_train, BATCHSIZE):
        # Get samples
        data = Variable(torch.FloatTensor(data).cuda())
        target = Variable(torch.LongTensor(target).cuda())
        # Init
        optimizer.zero_grad()
        # Forwards
        output = model(data)
        # Loss
        loss = F.cross_entropy(output, target)
        # Back-prop
        loss.backward()
        optimizer.step()
    # Log
    print(j)

0
1
2
3
4
5
6
7
8
9
10
11
CPU times: user 57.3 s, sys: 4.29 s, total: 1min 1s
Wall time: 1min 2s


In [10]:
%%time
# Test model
# Sets training = False
model.eval()
y_guess = np.zeros(y_test.shape, dtype=np.int)
y_truth = np.zeros(y_test.shape, dtype=np.int)
c = 0
for data, target in yield_mb(x_test, y_test, BATCHSIZE):
    # Get samples
    data = Variable(torch.FloatTensor(data).cuda())
    # Forwards
    output = model(data)
    pred = output.data.max(1)[1].cpu().numpy().squeeze()
    # Collect results
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred
    y_truth[c*BATCHSIZE:(c+1)*BATCHSIZE] = y_test[c*BATCHSIZE:(c+1)*BATCHSIZE]
    c += 1

CPU times: user 248 ms, sys: 12 ms, total: 260 ms
Wall time: 164 ms


In [11]:
print("Accuracy: ", sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.9896
