In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [2]:
# download and transform train dataset
train_loader = torch.utils.data.DataLoader(datasets.MNIST('../mnist_data', 
                                                          download=True, 
                                                          train=True,
                                                          transform=transforms.Compose([
                                                              transforms.ToTensor(), # first, convert image to PyTorch tensor
                                                              transforms.Normalize((0.1307,), (0.3081,)) # normalize inputs
                                                          ])), 
                                           batch_size=10, 
                                           shuffle=True)

# download and transform test dataset
test_loader = torch.utils.data.DataLoader(datasets.MNIST('../mnist_data', 
                                                          download=True, 
                                                          train=False,
                                                          transform=transforms.Compose([
                                                              transforms.ToTensor(), # first, convert image to PyTorch tensor
                                                              transforms.Normalize((0.1307,), (0.3081,)) # normalize inputs
                                                          ])), 
                                           batch_size=10, 
                                           shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [3]:
class CNNClassifier(nn.Module):
    """Custom module for a simple convnet classifier"""
    def __init__(self):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.dropout = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
    
    def forward(self, x):
        # input is 28x28x1
        # conv1(kernel=5, filters=10) 28x28x10 -> 24x24x10
        # max_pool(kernel=2) 24x24x10 -> 12x12x10
        
        # Do not be afraid of F's - those are just functional wrappers for modules form nn package
        # Please, see for yourself - http://pytorch.org/docs/_modules/torch/nn/functional.html
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        
        # conv2(kernel=5, filters=20) 12x12x20 -> 8x8x20
        # max_pool(kernel=2) 8x8x20 -> 4x4x20
        x = F.relu(F.max_pool2d(self.dropout(self.conv2(x)), 2))
        
        # flatten 4x4x20 = 320
        x = x.view(-1, 320)
        
        # 320 -> 50
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        
        # 50 -> 10
        x = self.fc2(x)
        
        # transform to logits
        return F.log_softmax(x)

In [4]:
# create classifier and optimizer objects
clf = CNNClassifier()
opt = optim.SGD(clf.parameters(), lr=0.01, momentum=0.5)

In [5]:
loss_history = []
acc_history = []

In [6]:
def train(epoch):
    clf.train() # set model in training mode (need this because of dropout)
    
    # dataset API gives us pythonic batching 
    for batch_id, (data, label) in enumerate(train_loader):
        data = Variable(data)
        target = Variable(label)
        
        # forward pass, calculate loss and backprop!
        opt.zero_grad()
        preds = clf(data)
        loss = F.nll_loss(preds, target)
        loss.backward()
        loss_history.append(loss.data[0])
        opt.step()
        
        if batch_id % 100 == 0:
            print(loss.data[0])

In [7]:
def test(epoch):
    clf.eval() # set model in inference mode (need this because of dropout)
    test_loss = 0
    correct = 0
    
    for data, target in test_loader:
        data = Variable(data, volatile=True) 
        target = Variable(target)
        
        output = clf(data)
        test_loss += F.nll_loss(output, target).data[0]
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()

    test_loss = test_loss
    test_loss /= len(test_loader) # loss function already averages over batch size
    accuracy = 100. * correct / len(test_loader.dataset)
    acc_history.append(accuracy)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        accuracy))

In [8]:
for epoch in range(0, 3):
    print("Epoch %d" % epoch)
    train(epoch)
    test(epoch)

Epoch 0


  


tensor(2.3320)
tensor(2.0945)
tensor(1.4759)
tensor(0.9243)
tensor(1.5738)
tensor(1.0186)
tensor(0.3942)
tensor(0.6823)
tensor(0.3542)
tensor(1.2541)
tensor(0.7162)
tensor(0.4104)
tensor(0.9693)
tensor(0.5080)
tensor(0.1244)
tensor(0.3051)
tensor(0.5918)
tensor(0.4800)
tensor(0.3910)
tensor(0.0811)
tensor(0.9574)
tensor(0.9650)
tensor(0.1668)
tensor(0.1952)
tensor(0.1431)
tensor(0.5419)
tensor(0.6860)
tensor(0.1208)
tensor(0.0740)
tensor(0.1246)
tensor(0.2140)
tensor(0.1329)
tensor(0.0339)
tensor(0.6213)
tensor(0.4672)
tensor(0.2834)
tensor(1.0903)
tensor(0.3067)
tensor(0.2296)
tensor(0.4960)
tensor(0.1166)
tensor(0.0245)
tensor(0.0955)
tensor(0.2577)
tensor(0.0628)
tensor(0.1854)
tensor(0.0492)
tensor(0.3638)
tensor(0.2583)
tensor(0.2217)
tensor(0.1675)
tensor(0.1663)
tensor(0.5322)
tensor(0.0189)
tensor(0.4190)
tensor(0.3077)
tensor(1.5226)
tensor(0.2221)
tensor(0.0364)
tensor(0.0095)


  import sys
  # This is added back by InteractiveShellApp.init_path()



Test set: Average loss: 0.0804, Accuracy: 9744/10000 (97%)

Epoch 1
tensor(0.0097)
tensor(0.0343)
tensor(0.2119)
tensor(1.6914)
tensor(0.2777)
tensor(0.2914)
tensor(0.1387)
tensor(0.3247)
tensor(0.0523)
tensor(0.3782)
tensor(0.0048)
tensor(1.6521)
tensor(0.3565)
tensor(0.3413)
tensor(0.8521)
tensor(0.0377)
tensor(0.2722)
tensor(0.1259)
tensor(0.1051)
tensor(0.1395)
tensor(0.0019)
tensor(0.0928)
tensor(0.5927)
tensor(0.2855)
tensor(0.0038)
tensor(0.3606)
tensor(0.2381)
tensor(0.2725)
tensor(0.5244)
tensor(0.6305)
tensor(0.1917)
tensor(0.4355)
tensor(0.0885)
tensor(1.0223)
tensor(0.1195)
tensor(0.0842)
tensor(0.1960)
tensor(0.3575)
tensor(0.0805)
tensor(0.1442)
tensor(0.2680)
tensor(0.0817)
tensor(0.0462)
tensor(0.0666)
tensor(0.5805)
tensor(0.0757)
tensor(0.3727)
tensor(0.2512)
tensor(0.1721)
tensor(0.4492)
tensor(0.2136)
tensor(0.1700)
tensor(0.0741)
tensor(0.2729)
tensor(0.0575)
tensor(0.0332)
tensor(1.6867)
tensor(0.2457)
tensor(0.0166)
tensor(0.1318)

Test set: Average loss: 0.0655