# Coding: LeNet for MNIST

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Set random seed for reproducibility
torch.manual_seed(42)

# Define the LeNet architecture
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv0 = nn.Conv2d(1, 6, kernel_size=5)
        self.conv1 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc0 = nn.Linear(16 * 4 * 4, 120)
        self.fc1 = nn.Linear(120, 84)
        self.fc2 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool1d(kernel_size=2, stride=2)

    def forward(self, x):
        # Single-Channel 1-6: First conv block
        # dimension: 28x28x1 -> 24x24x6
        x = self.relu(self.conv0(x))
        # dimension: 24x24x6 -> 12x12x6
        x = self.maxpool(x)
        
        # Muti-Channel 6-16: Second conv block
        # dimension: 12x12x6 -> 8x8x16
        x = self.relu(self.conv1(x))
        # dimension: 8x8x16 -> 4x4x16
        x = self.maxpool(x)
        
        # MLP: Flatten and fully connected layers
        x = x.view(x.size(-1), -1)
        # dimension: 16x4x4 -> 120
        x = self.relu(self.fc0(x))
        # dimension: 120 -> 84
        x = self.relu(self.fc1(x))
        # dimension: 84 -> 10
        x = self.fc2(x)
        return x

# Data loading and preprocessing
def load_data(batch_size=64):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
    
    train_dataset = torchvision.datasets.MNIST(
        root='./data', 
        train=True,
        download=True, 
        transform=transform
    )
    
    test_dataset = torchvision.datasets.MNIST(
        root='./data', 
        train=False,
        download=True, 
        transform=transform
    )
    
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size,
        shuffle=True
    )
    
    test_loader = DataLoader(
        test_dataset, 
        batch_size=batch_size,
        shuffle=False
    )
    
    return train_loader, test_loader

# Training
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        # Do Not Forget to Zero Gradients
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    accuracy = 100. * correct / total
    return running_loss / len(train_loader), accuracy

# Evaluation
def evaluate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    accuracy = 100. * correct / total
    return running_loss / len(test_loader), accuracy

def main():
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'Using device: {device}')
    
    # Hyperparameters
    batch_size = 64
    learning_rate = 0.001
    num_epochs = 10
    
    # Load data
    train_loader, test_loader = load_data(batch_size)
    
    # Initialize model, loss function, and optimizer
    model = LeNet().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # Training loop
    print('Starting training...')
    for epoch in range(num_epochs):
        train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
        test_loss, test_acc = evaluate(model, test_loader, criterion, device)
        
        print(f'Epoch [{epoch+1}/{num_epochs}]:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
        print('-' * 50)
    
    # Save the trained model
    torch.save(model.state_dict(), 'lenet_mnist.pth')
    print('Training completed and model saved!')

if __name__ == '__main__':
    main()

Using device: cpu


100%|██████████| 9.91M/9.91M [00:00<00:00, 14.6MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 394kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.69MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 11.1MB/s]


Starting training...
Epoch [1/10]:
Train Loss: 0.2402, Train Acc: 92.56%
Test Loss: 0.0785, Test Acc: 97.49%
--------------------------------------------------
Epoch [2/10]:
Train Loss: 0.0705, Train Acc: 97.72%
Test Loss: 0.0471, Test Acc: 98.44%
--------------------------------------------------
Epoch [3/10]:
Train Loss: 0.0510, Train Acc: 98.40%
Test Loss: 0.0490, Test Acc: 98.40%
--------------------------------------------------
Epoch [4/10]:
Train Loss: 0.0383, Train Acc: 98.81%
Test Loss: 0.0384, Test Acc: 98.78%
--------------------------------------------------
Epoch [5/10]:
Train Loss: 0.0333, Train Acc: 98.95%
Test Loss: 0.0388, Test Acc: 98.80%
--------------------------------------------------
Epoch [6/10]:
Train Loss: 0.0289, Train Acc: 99.06%
Test Loss: 0.0355, Test Acc: 98.85%
--------------------------------------------------
Epoch [7/10]:
Train Loss: 0.0237, Train Acc: 99.26%
Test Loss: 0.0306, Test Acc: 99.08%
--------------------------------------------------
Epoch 