In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torchsummary import summary

import numpy as np

In [2]:
epochs        = 10
batch_size    = 32                  # number of samples per batch
imsize        = 28
num_classes   = 10                  # The number of output classes. In this case, from 0 to 9
learning_rate = torch.tensor(1e-2)  # The speed of convergence
momentum      = torch.tensor(9e-1)  # momentum for optimizer
decay         = torch.tensor(5e-4)  # weight decay for regularization

In [3]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
print("Device: ", device)

Device:  cpu


In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])

train_data = datasets.MNIST('data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

test_data = datasets.MNIST('data', train=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [5]:
class LeNet(nn.Module):
    def __init__(self, in_chan, out_chan, imsize, kernel_size=5, drop1=0, drop2=0.5):
        super(LeNet, self).__init__()
        
        z = 5
        
        self.conv1  = nn.Conv2d(in_chan, 6, kernel_size, padding=2)
        self.conv2  = nn.Conv2d(6, 16, kernel_size)
        self.fc1    = nn.Linear(16*z*z, 120)
        self.fc2    = nn.Linear(120, 84)
        self.fc3    = nn.Linear(84, out_chan)
        self.drop2d = nn.Dropout2d(p=drop1)
        self.drop   = nn.Dropout(p=drop2)
        
        self.init_weights()
    
    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)

    def enable_dropout(self):
        for m in self.modules():
            if isinstance(m, nn.Dropout):
                m.train()

    def forward(self, x):
        
        x = F.relu(self.conv1(x))
        x = self.drop2d(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = self.drop2d(x)
        x = F.max_pool2d(x, 2)
        x = x.view(x.size()[0], -1)
        x = F.relu(self.fc1(x))
        x = self.drop(x)
        x = F.relu(self.fc2(x))
        x = self.drop(x)
        x = self.fc3(x)
        
        return x

In [6]:
model = LeNet(in_chan=1, out_chan=num_classes, imsize=imsize, kernel_size=5, drop1=0, drop2=0.5).to(device=device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=decay)

In [7]:
summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
         Dropout2d-2            [-1, 6, 28, 28]               0
            Conv2d-3           [-1, 16, 10, 10]           2,416
         Dropout2d-4           [-1, 16, 10, 10]               0
            Linear-5                  [-1, 120]          48,120
           Dropout-6                  [-1, 120]               0
            Linear-7                   [-1, 84]          10,164
           Dropout-8                   [-1, 84]               0
            Linear-9                   [-1, 10]             850
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.10
Params size (MB): 0.24
Estimated Total Size (MB): 0.34
---------------------------------------------

In [None]:
start = time.time()

epoch_trainaccs, epoch_testaccs = [], []
for epoch in range(epochs):  # loop over the dataset multiple times

    model.train()
    train_accs=[]; acc = 0
    for batch, (x_train, y_train) in enumerate(train_loader):
        
        x_train = x_train.to(device=device)
        y_train = y_train.to(device=device)
        
        model.zero_grad()
        
        pred = model(x_train)
        loss = F.cross_entropy(pred, y_train)
        loss.backward()
        optimizer.step()
        
        acc = (pred.argmax(dim=-1) == y_train).to(torch.float32).mean()
        train_accs.append(acc.item())

    with torch.no_grad():
        model.eval()
        test_losses, test_accs = [], []; acc = 0
        for i, (x_test, y_test) in enumerate(test_loader):
            
            x_test = x_test.to(device=device)
            y_test = y_test.to(device=device)
            
            test_pred = model(x_test)
            loss = F.cross_entropy(test_pred, y_test)
            
            acc = (test_pred.argmax(dim=-1) == y_test).to(torch.float32).mean()
            test_losses.append(loss.item())
            test_accs.append(acc.mean().item())

    if verbose:
        print('Epoch: {}, Loss: {}, Accuracy: {}'.format(epoch, np.mean(test_losses), np.mean(test_accs)))
        
    epoch_trainaccs.append(np.mean(train_accs))
    epoch_testaccs.append(np.mean(test_accs))
    
if use_cuda: torch.cuda.synchronize()
end = time.time()
print("Run time [s]: ",end-start)

Epoch: 0, Loss: 0.08791932333971168, Accuracy: 0.9719448881789138
Epoch: 1, Loss: 0.05949718490429483, Accuracy: 0.9838258785942492
Epoch: 2, Loss: 0.058786239978726604, Accuracy: 0.983526357827476
Epoch: 3, Loss: 0.04681262281884263, Accuracy: 0.9863218849840255
Epoch: 4, Loss: 0.04564482418936677, Accuracy: 0.9868210862619808
Epoch: 5, Loss: 0.03939377796039701, Accuracy: 0.987120607028754


In [None]:
print('Finished Training', iter)
print("Final test error: ",100.*(1 - epoch_testaccs[-1]))

np.savez("./mnist_lenet_none.npz",epoch_testaccs)