In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
transforms = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])

# download and create datasets
train_dataset = datasets.MNIST(root='mnist_data', train=True, transform=transforms, download=True)
test_dataset = datasets.MNIST(root='mnist_data', train=False, transform=transforms)

# define the data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

In [3]:
class LeNet5(nn.Module):

    def __init__(self, n_classes):
        super(LeNet5, self).__init__()
        
        self.conv = nn.Sequential(            
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.Dropout(0.5)
        )

        self.out = nn.Sequential(
            nn.Linear(in_features=120, out_features=84),
            nn.Tanh(),
            nn.Linear(in_features=84, out_features=n_classes),
        )


    def forward(self, x):
        x = self.conv(x)
        x = torch.flatten(x, 1)
        y = self.out(x)
        z = F.softmax(y, dim=1)
        return y, z

In [4]:
def train(train_loader, model, criterion, optimizer):
    model.train()
    total_loss = 0
    
    for X, y_true in train_loader:
        optimizer.zero_grad()
        y, i = model(X) 
        loss = criterion(y, y_true) 
        total_loss += loss.item() * X.size(0)
        loss.backward()
        optimizer.step()
        
    epoch_loss = total_loss / len(train_loader.dataset)
    return model, optimizer, epoch_loss

def test(test_loader, model, criterion):
    model.eval()
    total_loss = 0
    
    for X, y_true in test_loader:
        y, i = model(X) 
        loss = criterion(y, y_true) 
        total_loss += loss.item() * X.size(0)

    epoch_loss = total_loss / len(test_loader.dataset)
        
    return model, epoch_loss


def accuracy(model, data_loader):
    y_correct = 0 
    n = 0
    with torch.no_grad():
        model.eval()
        for X, y_true in data_loader:
            i, y = model(X)
            i, predicted_labels = torch.max(y, 1)
            y_correct += (predicted_labels == y_true).sum()
            n += y_true.size(0)

    return y_correct.float() / n

In [5]:
def training(model, criterion, optimizer, train_loader, test_loader, epochs, print_every=1):
    train_losses = []
    test_losses = []
 
    # Train model
    for epoch in range(0, epochs):
        model, optimizer, train_loss = train(train_loader, model, criterion, optimizer)
        train_losses.append(train_loss)

        with torch.no_grad():
            model, test_loss = test(test_loader, model, criterion)
            test_losses.append(test_loss)

        if epoch % print_every == (print_every - 1):
            train_acc = accuracy(model, train_loader)
            test_acc = accuracy(model, test_loader)
            print(f'Epoch: {epoch}\t'
                  f'Train loss: {train_loss:.4f}\t'
                  f'Train accuracy: {100 * train_acc:.2f}\t'
                  f'Test accuracy: {100 * test_acc:.2f}')

    return model, optimizer, (train_losses, test_losses)

In [6]:
# model implementation --- learning rate 0.01
model = LeNet5(10)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

model, optimizer, i = training(model, criterion, optimizer, train_loader, test_loader, 15)

Epoch: 0	Train loss: 0.4228	Train accuracy: 93.64	Test accuracy: 94.04
Epoch: 1	Train loss: 0.3543	Train accuracy: 93.94	Test accuracy: 93.93
Epoch: 2	Train loss: 0.3379	Train accuracy: 93.89	Test accuracy: 94.08
Epoch: 3	Train loss: 0.3332	Train accuracy: 93.84	Test accuracy: 94.03
Epoch: 4	Train loss: 0.3347	Train accuracy: 94.28	Test accuracy: 94.62
Epoch: 5	Train loss: 0.3312	Train accuracy: 94.05	Test accuracy: 94.32
Epoch: 6	Train loss: 0.3283	Train accuracy: 93.76	Test accuracy: 93.88
Epoch: 7	Train loss: 0.3176	Train accuracy: 94.73	Test accuracy: 94.72
Epoch: 8	Train loss: 0.2994	Train accuracy: 94.78	Test accuracy: 95.26
Epoch: 9	Train loss: 0.3036	Train accuracy: 95.04	Test accuracy: 95.35
Epoch: 10	Train loss: 0.3085	Train accuracy: 94.45	Test accuracy: 94.58
Epoch: 11	Train loss: 0.3061	Train accuracy: 94.26	Test accuracy: 94.58
Epoch: 12	Train loss: 0.3112	Train accuracy: 94.08	Test accuracy: 94.40
Epoch: 13	Train loss: 0.3314	Train accuracy: 94.05	Test accuracy: 94.67
Ep

In [7]:
# model implementation --- learning rate 0.001
model2 = LeNet5(10)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=0.001)

model2, optimizer2, i = training(model2, criterion, optimizer2, train_loader, test_loader, 15)

Epoch: 0	Train loss: 0.3318	Train accuracy: 96.17	Test accuracy: 96.41
Epoch: 1	Train loss: 0.1415	Train accuracy: 97.65	Test accuracy: 97.59
Epoch: 2	Train loss: 0.1043	Train accuracy: 98.06	Test accuracy: 98.00
Epoch: 3	Train loss: 0.0895	Train accuracy: 98.59	Test accuracy: 98.44
Epoch: 4	Train loss: 0.0791	Train accuracy: 98.65	Test accuracy: 98.58
Epoch: 5	Train loss: 0.0737	Train accuracy: 98.84	Test accuracy: 98.61
Epoch: 6	Train loss: 0.0647	Train accuracy: 98.94	Test accuracy: 98.60
Epoch: 7	Train loss: 0.0624	Train accuracy: 98.87	Test accuracy: 98.55
Epoch: 8	Train loss: 0.0587	Train accuracy: 99.19	Test accuracy: 98.80
Epoch: 9	Train loss: 0.0554	Train accuracy: 99.08	Test accuracy: 98.77
Epoch: 10	Train loss: 0.0525	Train accuracy: 99.13	Test accuracy: 98.91
Epoch: 11	Train loss: 0.0507	Train accuracy: 99.14	Test accuracy: 98.73
Epoch: 12	Train loss: 0.0479	Train accuracy: 99.37	Test accuracy: 99.05
Epoch: 13	Train loss: 0.0468	Train accuracy: 99.33	Test accuracy: 98.90
Ep