# LeNet

In [1]:
import os
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
print(f'Import PyTorch V{torch.__version__}')

dev = torch.device(type='cuda') if torch.cuda.is_available() else torch.device(type='cpu')
print(f'Use device {dev}')

Import PyTorch V1.13.1
Use device cuda


In [2]:
print('Create LeNet Model')

def gen_lenet() -> torch.nn.Module:
    return torch.nn.Sequential(
        torch.nn.Conv2d(1, 6, kernel_size=5, padding=2), torch.nn.Sigmoid(),
        torch.nn.AvgPool2d(kernel_size=2, stride=2),
        torch.nn.Conv2d(6, 16, kernel_size=5), torch.nn.Sigmoid(),
        torch.nn.AvgPool2d(kernel_size=2, stride=2),
        torch.nn.Flatten(),
        torch.nn.Linear(16 * 5 * 5, 120), torch.nn.Sigmoid(),
        torch.nn.Linear(120, 84), torch.nn.Sigmoid(),
        torch.nn.Linear(84, 10), # torch.nn.Softmax(),
    )

m = gen_lenet().to(dev)
print(m)


Create LeNet Model
Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): Sigmoid()
  (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): Sigmoid()
  (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): Sigmoid()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): Sigmoid()
  (11): Linear(in_features=84, out_features=10, bias=True)
)


In [3]:
print('Prepare fasion mnist dataset')

import torchvision
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

batch_size = 64 if torch.cuda.is_available() else 32

ds_train = torchvision.datasets.FashionMNIST(
    root='data',
    train=True,
    download=True,
    transform=ToTensor(),
)

ds_test = torchvision.datasets.FashionMNIST(
    root='data',
    train=False,
    download=True,
    transform=ToTensor(),
)

loader_train = DataLoader(ds_train, batch_size, True)
loader_test  = DataLoader(ds_test,  batch_size)

for X, y in loader_test:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Prepare fasion mnist dataset
Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [4]:
loss_fn = torch.nn.CrossEntropyLoss()
opt = torch.optim.SGD(m.parameters(), lr=0.9)
print(f'Prepared loss function {loss_fn}')
print(f'Prepared optimizator {opt}')

Prepared loss function CrossEntropyLoss()
Prepared optimizator SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.9
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [5]:
from typing import Callable, Tuple

def train(
        dataloader: DataLoader,
        model: torch.nn.Module,
        loss_fn: Callable,
        optimizer: torch.optim.Optimizer,
        print_cycle: int = 100,
) -> None:
    if not isinstance(dataloader, DataLoader) or \
            not isinstance(model, torch.nn.Module) or \
            not isinstance(loss_fn, Callable) or \
            not isinstance(optimizer, torch.optim.Optimizer) or \
            not isinstance(print_cycle, int) or \
            print_cycle < 10:
        raise TypeError(f'Wrong input types: {type(dataloader)}, '
                        f'{type(model)}, {type(loss_fn)}, {type(optimizer)}, '
                        f'{type(print_cycle)}, {print_cycle}')
    size = len(dataloader.dataset)
    model.train()
    dev = next(iter(model.parameters())).device
    for batch, (X, y) in enumerate(dataloader):
        optimizer.zero_grad()
        X, y = X.to(dev), y.to(dev)
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        # Backpropagation
        loss.backward()
        optimizer.step()
        if batch % print_cycle == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(
        dataloader: DataLoader,
        model: torch.nn.Module,
        loss_fn: Callable,
) -> Tuple[float, float]:
    if not isinstance(dataloader, DataLoader) or \
            not isinstance(model, torch.nn.Module) or \
            not isinstance(loss_fn, Callable):
        raise TypeError(f'Wrong input types: {type(dataloader)}, '
                        f'{type(model)}, {type(loss_fn)}')
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0.0, 0.0
    dev = next(iter(model.parameters())).device
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(dev), y.to(dev)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    return (test_loss, correct)

In [6]:
def init_weights(m):
    if type(m) == torch.nn.Linear or type(m) == torch.nn.Conv2d:
        torch.nn.init.xavier_uniform_(m.weight)

m.apply(init_weights)

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): Sigmoid()
  (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): Sigmoid()
  (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): Sigmoid()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): Sigmoid()
  (11): Linear(in_features=84, out_features=10, bias=True)
)

In [7]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(loader_train, m, loss_fn, opt, 600)
    l, correct = test(loader_test, m, loss_fn)
    print(f'Test Error: Accuracy {(100*correct):>0.1f}% Avg loss {l:>8f}')
    print()

Epoch 1
-------------------------------
loss: 2.628754  [   64/60000]
loss: 2.324136  [38464/60000]
Test Error: Accuracy 10.0% Avg loss 2.305992

Epoch 2
-------------------------------
loss: 2.304257  [   64/60000]
loss: 1.835184  [38464/60000]
Test Error: Accuracy 62.8% Avg loss 0.966618

Epoch 3
-------------------------------
loss: 0.959909  [   64/60000]
loss: 0.590586  [38464/60000]
Test Error: Accuracy 76.4% Avg loss 0.599620

Epoch 4
-------------------------------
loss: 0.616673  [   64/60000]
loss: 0.403613  [38464/60000]
Test Error: Accuracy 73.6% Avg loss 0.694210

Epoch 5
-------------------------------
loss: 0.754572  [   64/60000]
loss: 0.378618  [38464/60000]
Test Error: Accuracy 82.3% Avg loss 0.464203

Epoch 6
-------------------------------
loss: 0.457129  [   64/60000]
loss: 0.689101  [38464/60000]
Test Error: Accuracy 83.4% Avg loss 0.451962

Epoch 7
-------------------------------
loss: 0.541685  [   64/60000]
loss: 0.398665  [38464/60000]
Test Error: Accuracy 84.

In [8]:
# try improved LeNet
def get_lenet2() -> torch.nn.Module:
    return torch.nn.Sequential(
        torch.nn.Conv2d(1, 6, kernel_size=5, padding=2), torch.nn.ReLU(),
        torch.nn.MaxPool2d(2, 2),
        torch.nn.Conv2d(6, 16, kernel_size=5), torch.nn.ReLU(),
        torch.nn.MaxPool2d(2, 2),
        torch.nn.Flatten(), torch.nn.Linear(400, 120), torch.nn.ReLU(),
        torch.nn.Linear(120, 84), torch.nn.ReLU(),
        torch.nn.Linear(84, 10), # torch.nn.Softmax(),
    )

m2 = get_lenet2().to(dev)
print('Improved LeNet')
print(m2)

Improved LeNet
Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): ReLU()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): ReLU()
  (11): Linear(in_features=84, out_features=10, bias=True)
)


In [9]:
m2.apply(init_weights) # initialize weights

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): ReLU()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): ReLU()
  (11): Linear(in_features=84, out_features=10, bias=True)
)

In [10]:
opt2 = torch.optim.SGD(m2.parameters(), 0.01)
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(loader_train, m2, loss_fn, opt2, 600)
    l, correct = test(loader_test, m2, loss_fn)
    print(f'Test Error: Accuracy {(100*correct):>0.1f}% Avg loss {l:>8f}')
    print()

Epoch 1
-------------------------------
loss: 2.306036  [   64/60000]
loss: 1.101006  [38464/60000]
Test Error: Accuracy 72.4% Avg loss 0.701452

Epoch 2
-------------------------------
loss: 0.775961  [   64/60000]
loss: 0.476271  [38464/60000]
Test Error: Accuracy 75.8% Avg loss 0.633434

Epoch 3
-------------------------------
loss: 0.693133  [   64/60000]
loss: 0.309150  [38464/60000]
Test Error: Accuracy 79.5% Avg loss 0.537873

Epoch 4
-------------------------------
loss: 0.634152  [   64/60000]
loss: 0.453298  [38464/60000]
Test Error: Accuracy 81.0% Avg loss 0.512351

Epoch 5
-------------------------------
loss: 0.599051  [   64/60000]
loss: 0.368264  [38464/60000]
Test Error: Accuracy 83.9% Avg loss 0.454806

Epoch 6
-------------------------------
loss: 0.436514  [   64/60000]
loss: 0.612654  [38464/60000]
Test Error: Accuracy 83.7% Avg loss 0.448276

Epoch 7
-------------------------------
loss: 0.386359  [   64/60000]
loss: 0.270311  [38464/60000]
Test Error: Accuracy 84.