In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6"
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from utils import AverageMeter, ProgressMeter

In [2]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
        self.fc1 = nn.Linear(576, 64)
        self.fc2 = nn.Linear(64, 10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv2(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.conv3(x)
        batch, _, _, _ = x.shape
        x = x.view(batch, -1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [3]:
from torchsummary import summary
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Model().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 26, 26]             320
            Conv2d-2           [-1, 64, 11, 11]          18,496
            Conv2d-3             [-1, 64, 3, 3]          36,928
            Linear-4                   [-1, 64]          36,928
            Linear-5                   [-1, 10]             650
Total params: 93,322
Trainable params: 93,322
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.23
Params size (MB): 0.36
Estimated Total Size (MB): 0.59
----------------------------------------------------------------


In [4]:
def accuracy(output, target, topk=(1,)):
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)
        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [5]:
def train(train_loader, model, optimizer, device, criteria):
    losses = AverageMeter("Loss", ':.4e')
    top1 = AverageMeter("Acc", ":6.2f")
    progress = ProgressMeter(len(train_loader), losses, top1)
    for i, (input, target) in enumerate(train_loader):
        input = input.to(device)
        batch_size, C, H, W = input.shape
        target = target.to(device)
        pred = model(input)
        loss = criteria(pred, target)
        acc = accuracy(pred, target)
        losses.update(loss.item(), batch_size)
        top1.update(acc[0].item(), batch_size)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            progress.pr2int(i)
    return losses, top1

In [6]:
def evaluate(model, device, test_loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    print('Test set: Accuracy: {}/{} ({:.0f}%)'.format(
    correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [7]:
def main():
    transform = transforms.Compose([transforms.ToTensor()])
    data_train = datasets.MNIST("./data", transform=transform,
                                train=True,
                                download=False)
    train_loader = torch.utils.data.DataLoader(data_train, batch_size=64, shuffle=False, num_workers=10)
    data_test = datasets.MNIST("./data/", transform=transform,
                               train=False,
                               download=False)
    test_loader = torch.utils.data.DataLoader(data_test, batch_size=64,
                                              shuffle=False)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Model().to(device)
    criteria = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=0.001, alpha=0.9, eps=0)
    min_loss = 10.0
    for epoch in range(5):
        losses, top1 = train(train_loader, model, optimizer, device, criteria)
        print(losses, top1)
        evaluate(model, device, test_loader)
    torch.save(model.state_dict(),
               'best.pt')
    evaluate(model, device, test_loader)

In [8]:
main()

[  0/938]	Loss 2.3071e+00 (2.3071e+00)	Acc   7.81 (  7.81)
[100/938]	Loss 2.2812e-01 (5.2619e-01)	Acc  92.19 ( 83.66)
[200/938]	Loss 1.2538e-01 (3.9329e-01)	Acc  95.31 ( 88.11)
[300/938]	Loss 1.7269e-01 (3.1878e-01)	Acc  95.31 ( 90.29)
[400/938]	Loss 9.9424e-02 (2.6947e-01)	Acc  95.31 ( 91.74)
[500/938]	Loss 1.2166e-01 (2.4007e-01)	Acc  96.88 ( 92.64)
[600/938]	Loss 9.6993e-02 (2.1766e-01)	Acc  98.44 ( 93.35)
[700/938]	Loss 1.5559e-01 (2.0142e-01)	Acc  96.88 ( 93.83)
[800/938]	Loss 1.4644e-01 (1.8981e-01)	Acc  95.31 ( 94.16)
[900/938]	Loss 1.1408e-01 (1.7926e-01)	Acc  96.88 ( 94.49)
Loss 9.5981e-02 (1.7582e-01) Acc  93.75 ( 94.62)
Test set: Accuracy: 9694/10000 (97%)
[  0/938]	Loss 1.1038e-01 (1.1038e-01)	Acc  98.44 ( 98.44)
[100/938]	Loss 1.6711e-01 (9.2125e-02)	Acc  96.88 ( 97.46)
[200/938]	Loss 9.9987e-02 (9.1163e-02)	Acc  93.75 ( 97.29)
[300/938]	Loss 8.4654e-02 (8.6909e-02)	Acc  98.44 ( 97.33)
[400/938]	Loss 5.3673e-02 (8.2431e-02)	Acc  95.31 ( 97.47)
[500/938]	Loss 1.4192e-01 (8.