In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
transforms = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])

# download and create datasets
train_dataset = datasets.MNIST(root='mnist_data', train=True, transform=transforms, download=True)
test_dataset = datasets.MNIST(root='mnist_data', train=False, transform=transforms)

# define the data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

In [3]:
class LeNet5(nn.Module):

    def __init__(self, n_classes):
        super(LeNet5, self).__init__()
        
        self.conv = nn.Sequential(            
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
            nn.Tanh()
        )

        self.out = nn.Sequential(
            nn.Linear(in_features=120, out_features=84),
            nn.Tanh(),
            nn.Linear(in_features=84, out_features=n_classes),
        )


    def forward(self, x):
        x = self.conv(x)
        x = torch.flatten(x, 1)
        y = self.out(x)
        z = F.softmax(y, dim=1)
        return y, z

In [4]:
def train(train_loader, model, criterion, optimizer):
    model.train()
    total_loss = 0
    
    for X, y_true in train_loader:
        optimizer.zero_grad()
        y, i = model(X) 
        loss = criterion(y, y_true) 
        total_loss += loss.item() * X.size(0)
        loss.backward()
        optimizer.step()
        
    epoch_loss = total_loss / len(train_loader.dataset)
    return model, optimizer, epoch_loss

def test(test_loader, model, criterion):
    model.eval()
    total_loss = 0
    
    for X, y_true in test_loader:
        y, i = model(X) 
        loss = criterion(y, y_true) 
        total_loss += loss.item() * X.size(0)

    epoch_loss = total_loss / len(test_loader.dataset)
        
    return model, epoch_loss


def accuracy(model, data_loader):
    y_correct = 0 
    n = 0
    with torch.no_grad():
        model.eval()
        for X, y_true in data_loader:
            i, y = model(X)
            i, predicted_labels = torch.max(y, 1)
            y_correct += (predicted_labels == y_true).sum()
            n += y_true.size(0)

    return y_correct.float() / n

In [5]:
def training(model, criterion, optimizer, train_loader, test_loader, epochs, print_every=1):
    train_losses = []
    test_losses = []
 
    # Train model
    for epoch in range(0, epochs):
        model, optimizer, train_loss = train(train_loader, model, criterion, optimizer)
        train_losses.append(train_loss)

        with torch.no_grad():
            model, test_loss = test(test_loader, model, criterion)
            test_losses.append(test_loss)

        if epoch % print_every == (print_every - 1):
            train_acc = accuracy(model, train_loader)
            test_acc = accuracy(model, test_loader)
            print(f'Epoch: {epoch}\t'
                  f'Train loss: {train_loss:.4f}\t'
                  f'Train accuracy: {100 * train_acc:.2f}\t'
                  f'Test accuracy: {100 * test_acc:.2f}')

    return model, optimizer, (train_losses, test_losses)

In [6]:
# model implementation --- learning rate 0.01
model = LeNet5(10)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

model, optimizer, i = training(model, criterion, optimizer, train_loader, test_loader, 15)

Epoch: 0	Train loss: 0.2786	Train accuracy: 93.27	Test accuracy: 93.10
Epoch: 1	Train loss: 0.2160	Train accuracy: 94.77	Test accuracy: 94.87
Epoch: 2	Train loss: 0.2084	Train accuracy: 95.21	Test accuracy: 95.04
Epoch: 3	Train loss: 0.2019	Train accuracy: 94.46	Test accuracy: 94.58
Epoch: 4	Train loss: 0.1933	Train accuracy: 94.75	Test accuracy: 95.00
Epoch: 5	Train loss: 0.1942	Train accuracy: 93.47	Test accuracy: 93.64
Epoch: 6	Train loss: 0.1972	Train accuracy: 94.25	Test accuracy: 94.02
Epoch: 7	Train loss: 0.1943	Train accuracy: 94.43	Test accuracy: 94.45
Epoch: 8	Train loss: 0.1937	Train accuracy: 94.21	Test accuracy: 94.08
Epoch: 9	Train loss: 0.1930	Train accuracy: 94.14	Test accuracy: 94.13
Epoch: 10	Train loss: 0.2061	Train accuracy: 93.61	Test accuracy: 93.69
Epoch: 11	Train loss: 0.1887	Train accuracy: 95.37	Test accuracy: 95.19
Epoch: 12	Train loss: 0.1860	Train accuracy: 94.52	Test accuracy: 94.14
Epoch: 13	Train loss: 0.1915	Train accuracy: 94.59	Test accuracy: 94.68
Ep

In [7]:
# model implementation --- learning rate 0.001
model2 = LeNet5(10)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=0.001)

model2, optimizer2, i = training(model2, criterion, optimizer2, train_loader, test_loader, 15)

Epoch: 0	Train loss: 0.2309	Train accuracy: 96.78	Test accuracy: 97.16
Epoch: 1	Train loss: 0.0798	Train accuracy: 98.04	Test accuracy: 97.71
Epoch: 2	Train loss: 0.0577	Train accuracy: 98.82	Test accuracy: 98.16
Epoch: 3	Train loss: 0.0441	Train accuracy: 98.97	Test accuracy: 98.48
Epoch: 4	Train loss: 0.0374	Train accuracy: 99.03	Test accuracy: 98.36
Epoch: 5	Train loss: 0.0305	Train accuracy: 99.13	Test accuracy: 98.32
Epoch: 6	Train loss: 0.0265	Train accuracy: 99.47	Test accuracy: 98.58
Epoch: 7	Train loss: 0.0222	Train accuracy: 99.40	Test accuracy: 98.52
Epoch: 8	Train loss: 0.0183	Train accuracy: 99.28	Test accuracy: 98.45
Epoch: 9	Train loss: 0.0180	Train accuracy: 99.39	Test accuracy: 98.61
Epoch: 10	Train loss: 0.0162	Train accuracy: 99.64	Test accuracy: 98.74
Epoch: 11	Train loss: 0.0149	Train accuracy: 99.52	Test accuracy: 98.65
Epoch: 12	Train loss: 0.0135	Train accuracy: 99.65	Test accuracy: 98.68
Epoch: 13	Train loss: 0.0128	Train accuracy: 99.77	Test accuracy: 98.77
Ep