# MNIST PyTorch sandbox

In [2]:
import torch

# neural networks module, used to define models
import torch.nn as nn
import torch.nn.functional as F

# contains update rules for learnable parameters (e.g. SGD)
import torch.optim as optim

# common data loaders for image datasets
from torchvision import datasets, transforms

# automatic differentiation, Variable - basic building block with backprop
from torch.autograd import Variable

In [3]:
class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

args = AttrDict({
    'batch_size': 64,
    'test_batch_size': 1000,
    'epochs': 10,
    'lr': 0.01,
    'momentum': 0.5,
    'no-cuda': True,
    'cuda': False,
    'seed': 1,
    'log_interval': 100 # per num batches
})

### Defining neural network

In [4]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=10, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=5)
        self.conv2drop = nn.Dropout2d()
        
        # fc - linear transformation: Ax + b
        self.fc1 = nn.Linear(in_features=320, out_features=50)
        self.fc2 = nn.Linear(in_features=50, out_features=10)
    
    def forward(self, x):
        # conv -> pool -> activation
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        # conv (with Dropout) -> pool -> activation
        x = F.relu(F.max_pool2d(self.conv2drop(x), kernel_size=2))
        x = x.view(-1, 320)
        # fc -> activation
        x = F.relu(self.fc1(x))
        # apply dropout?
        x = F.dropout(x, training=self.training)
        # fc -> activation
        x = F.relu(self.fc2(x))
        return F.log_softmax(x, dim=1)

### Data fetchers

In [5]:
kwargs = {} # important!

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.test_batch_size, shuffle=True, **kwargs)

### Init

In [6]:
model = Network()
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

In [7]:
print('Model:', model)
print('Parameters updater:', optimizer)

Model: Network(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2drop): Dropout2d(p=0.5)
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)
Parameters updater: <torch.optim.sgd.SGD object at 0x7f7abcf22d30>


In [18]:
model.__dict__

{'_backend': <torch.nn.backends.thnn.THNNFunctionBackend at 0x7f7abd261940>,
 '_backward_hooks': OrderedDict(),
 '_buffers': OrderedDict(),
 '_forward_hooks': OrderedDict(),
 '_forward_pre_hooks': OrderedDict(),
 '_modules': OrderedDict([('conv1',
               Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))),
              ('conv2', Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))),
              ('conv2drop', Dropout2d(p=0.5)),
              ('fc1', Linear(in_features=320, out_features=50, bias=True)),
              ('fc2', Linear(in_features=50, out_features=10, bias=True))]),
 '_parameters': OrderedDict(),
 'training': True}

### Methods

In [8]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # wrap in Variable
        data, target = Variable(data), Variable(target)
        
        # clean gradients
        optimizer.zero_grad()
        # forward pass
        output = model(data)
        # compute loss
        loss = F.nll_loss(output, target)
        # compute gradients
        loss.backward()
        # update model parameters
        optimizer.step()
        
        # logging
        if batch_idx % args.log_interval == 0:
            print('Train loss: {:.6f}'.format(
                loss.data[0]
            ))

In [9]:
def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = Variable(data, volatile=True), Variable(target)
        
        # get result
        output = model(data)
        # sum up batch loss
        test_loss += F.nll_loss(output, target, size_average=False).data[0]
        # index of max log probability
        max_log_prob_idx = output.data.max(1, keepdim=True)[1]
        # whatever, seems to be somewhat accuracy number
        correct += max_log_prob_idx.eq(target.data.view_as(max_log_prob_idx)).long().cpu().sum()
        
    test_loss /= len(test_loader.dataset)
    print('Test set accuracy: {:.6f}'.format(correct / len(test_loader.dataset)))

In [10]:
args.epochs = 5
args.log_interval = 200

In [11]:
torch.manual_seed(args.seed)

<torch._C.Generator at 0x7f7ac826dcf0>

In [13]:
for i in range(args.epochs):
    print('Epoch: {}'.format(i))
    train(i+1)
    test()

Epoch: 0


RuntimeError: Assertion `THIndexTensor_(size)(target, 0) == batch_size' failed.  at /pytorch/torch/lib/THNN/generic/ClassNLLCriterion.c:79