In [1]:
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm, trange

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms

from torchsummary import summary
from torch.autograd import Variable

from extra.utils import *

In [2]:
batch_size = 128
torch.manual_seed(1337)

<torch._C.Generator at 0x7fc3bbde5af0>

In [5]:
from models.custom.mnist import *

In [6]:
model = MNIST()
model

MNIST(
  (layer1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=1568, out_features=10, bias=True)
)

In [12]:
torch.cuda.is_available()

device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device == 'cuda':
    model = model.cuda()
    cudnn.benchmark = True

In [13]:
summary(model, (1, 28,28), batch_size=batch_size, device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [128, 16, 28, 28]             160
       BatchNorm2d-2          [128, 16, 28, 28]              32
              ReLU-3          [128, 16, 28, 28]               0
         MaxPool2d-4          [128, 16, 14, 14]               0
            Conv2d-5          [128, 32, 14, 14]           4,640
       BatchNorm2d-6          [128, 32, 14, 14]              64
              ReLU-7          [128, 32, 14, 14]               0
         MaxPool2d-8            [128, 32, 7, 7]               0
            Linear-9                  [128, 10]          15,690
Total params: 20,586
Trainable params: 20,586
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.38
Forward/backward pass size (MB): 59.73
Params size (MB): 0.08
Estimated Total Size (MB): 60.19
-------------------------------------------

In [14]:
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.1307,), std=(0.3081,))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.1307,), std=(0.3081,))
])

trainset = torchvision.datasets.MNIST(root='./data/',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, shuffle=True, num_workers=4)

testset = torchvision.datasets.MNIST(root='./data/',
                          train=False,
                          transform=transforms.ToTensor())
testloader = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, shuffle=False, num_workers=4)

classes = ('0', '1', '2', '3', '4',
           '5', '6', '7', '8', '9')

In [16]:
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001,
#                       momentum=0.9, weight_decay=5e-4)
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [17]:
def train_nn(epoch):
    model.train()
    with tqdm(trainloader, unit="batch") as tepoch:
        for data, target in tepoch:
            tepoch.set_description(f"Train epoch {epoch}")

            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            _, predictions = output.max(1)
            loss = criterion(output, target)
            correct = predictions.eq(target).sum().item()
            accuracy = (predictions == target).float().mean()

            loss.backward()
            optimizer.step()

            tepoch.set_postfix(loss=loss.item(), accuracy=100. * accuracy.item())

In [18]:
def test_nn(epoch):
    model.eval()
    with torch.no_grad():
        with tqdm(testloader, unit="batch") as tepoch:
            for data, target in tepoch:
                tepoch.set_description(f"Test epoch {epoch}")
                
                data, target = data.to(device), target.to(device)
                output = model(data)
                _, predictions = output.max(1)
                loss = criterion(output, target)
                correct = predictions.eq(target).sum().item()
                accuracy = (predictions == target).float().mean()

                tepoch.set_postfix(loss=loss.item(), accuracy=100. * accuracy.item())

In [19]:
for epoch in trange(0, 12): #start_epoch, start_epoch+200
    train_nn(epoch)
    test_nn(epoch)
    scheduler.step()

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

  0%|          | 0/469 [00:00<?, ?batch/s]

  0%|          | 0/79 [00:00<?, ?batch/s]

In [20]:
model.eval()
correct = 0
total = 0

for images, labels in testloader:
    images, labels = images.to(device), labels.to(device)
    images = Variable(images)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
    
print("Test accuracy 10000 test images  = %f %%' % (100 * correct/total))

test Accuracy 10000 test images  = 98.879997 %
