In [1]:

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets.mnist import MNIST
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import torch.nn as nn
from collections import OrderedDict


In [2]:

class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()

        self.convnet = nn.Sequential(OrderedDict([
            ('c1', nn.Conv2d(1, 6, kernel_size=(5, 5))),
            ('relu1', nn.ReLU()),
            ('s2', nn.MaxPool2d(kernel_size=(2, 2), stride=2)),
            ('c3', nn.Conv2d(6, 16, kernel_size=(5, 5))),
            ('relu3', nn.ReLU()),
            ('s4', nn.MaxPool2d(kernel_size=(2, 2), stride=2)),
            ('c5', nn.Conv2d(16, 120, kernel_size=(5, 5))),
            ('relu5', nn.ReLU())
        ]))
        
        self.fc = nn.Sequential(OrderedDict([
            ('f6', nn.Linear(120, 84)),
            ('relu6', nn.ReLU()),
            ('f7', nn.Linear(84, 10)),
            ('sig7', nn.LogSoftmax(dim=-1))
        ]))

    def forward(self, img):
        output = self.convnet(img)
        output = output.view(img.size(0), -1)
        output = self.fc(output)
        return output

In [3]:
net = LeNet5()
net.cuda()

LeNet5(
  (convnet): Sequential(
    (c1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (relu1): ReLU()
    (s2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (c3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (relu3): ReLU()
    (s4): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (c5): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
    (relu5): ReLU()
  )
  (fc): Sequential(
    (f6): Linear(in_features=120, out_features=84, bias=True)
    (relu6): ReLU()
    (f7): Linear(in_features=84, out_features=10, bias=True)
    (sig7): LogSoftmax()
  )
)

In [4]:
net.convnet._modules

OrderedDict([('c1', Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))),
             ('relu1', ReLU()),
             ('s2',
              MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)),
             ('c3', Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))),
             ('relu3', ReLU()),
             ('s4',
              MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)),
             ('c5', Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))),
             ('relu5', ReLU())])

In [5]:

data_train = MNIST('./data/mnist',
                   download=True,
                   transform=transforms.Compose([
                       transforms.Resize((32, 32)),
                       transforms.ToTensor()]))
data_test = MNIST('./data/mnist',
                  train=False,
                  download=True,
                  transform=transforms.Compose([
                      transforms.Resize((32, 32)),
                      transforms.ToTensor()]))
data_train_loader = DataLoader(data_train, batch_size=256, shuffle=True, num_workers=8)
data_test_loader = DataLoader(data_test, batch_size=1024, num_workers=8)

In [17]:

criterion = nn.NLLLoss()
criterion.cuda()
optimizer = optim.Adam(net.parameters(), lr=2e-3)

def train(epoch):
    global cur_batch_win
    net.train()
    loss_list, batch_list = [], []
    for i, (images, labels) in enumerate(data_train_loader):
        optimizer.zero_grad()
        images, labels = images.cuda(),labels.cuda()        #move data to cuda
        output = net(images)

        loss = criterion(output, labels)

        loss_list.append(loss.detach().cpu().item())
        batch_list.append(i+1)

        if i % 100 == 0:
            print('Train - Epoch %d, Batch: %d, Loss: %f' % (epoch, i, loss.detach().cpu().item()))
        loss.backward()
        optimizer.step()


def test():
    net.eval()
    total_correct = 0
    avg_loss = 0.0
    for i, (images, labels) in enumerate(data_test_loader):
        images, labels = images.cuda(),labels.cuda()   #move data to cuda
        output = net(images)
        avg_loss += criterion(output, labels).sum()
        pred = output.detach().max(1)[1]
        total_correct += pred.eq(labels.view_as(pred)).sum()

    avg_loss /= len(data_test)
    print('Test Avg. Loss: %f, Accuracy: %f' % (avg_loss.detach().cpu().item(), float(total_correct) / len(data_test)))


def train_and_test(epoch):
    train(epoch)
    test()


def main():
    for e in range(1, 10):
        train_and_test(e)

In [18]:
main()

Train - Epoch 1, Batch: 0, Loss: 0.002769
Train - Epoch 1, Batch: 100, Loss: 0.003421
Train - Epoch 1, Batch: 200, Loss: 0.000152
Test Avg. Loss: 0.000070, Accuracy: 0.986800
Train - Epoch 2, Batch: 0, Loss: 0.001530
Train - Epoch 2, Batch: 100, Loss: 0.000784
Train - Epoch 2, Batch: 200, Loss: 0.011465
Test Avg. Loss: 0.000073, Accuracy: 0.985700
Train - Epoch 3, Batch: 0, Loss: 0.009400
Train - Epoch 3, Batch: 100, Loss: 0.008616
Train - Epoch 3, Batch: 200, Loss: 0.004830
Test Avg. Loss: 0.000074, Accuracy: 0.986500
Train - Epoch 4, Batch: 0, Loss: 0.033089
Train - Epoch 4, Batch: 100, Loss: 0.000594
Train - Epoch 4, Batch: 200, Loss: 0.001101
Test Avg. Loss: 0.000067, Accuracy: 0.987900
Train - Epoch 5, Batch: 0, Loss: 0.007269
Train - Epoch 5, Batch: 100, Loss: 0.000742
Train - Epoch 5, Batch: 200, Loss: 0.003261
Test Avg. Loss: 0.000074, Accuracy: 0.987100
Train - Epoch 6, Batch: 0, Loss: 0.000220
Train - Epoch 6, Batch: 100, Loss: 0.010782
Train - Epoch 6, Batch: 200, Loss: 0.00