In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sympy.physics.units import momentum
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']
# 解决负号'-'显示为方块的问题
plt.rcParams['axes.unicode_minus'] = False

%matplotlib inline

### 配置全局参数

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 自动适配CPU/GPU
BATCH_SIZE = 64
EPOCHS = 30  # 论文训练轮次（20-30轮足够收敛）
LEARNING_RATE = 0.01  # 论文推荐学习率

### 数据预处理

In [3]:
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # MNIST原始28×28，论文输入32×32
    transforms.ToTensor(),  # 转为Tensor：shape=(1,32,32)，值范围[0,1]
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST数据集统计均值/方差，提升训练稳定性
])

### 加载MINST数据集

In [4]:
train_dataset = datasets.MNIST(
    root='./data', train=True, download=True, transform=transform
)
test_dataset = datasets.MNIST(
    root='./data', train=False, download=True, transform=transform
)

### 构建数据加载器

In [5]:
train_loader=DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader=DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False) # 测试集不用进行打乱操作

### Le-Net5初始结构

In [6]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        # 特征提取部分（卷积+池化）
        self.features = nn.Sequential(
            # C1层：6个5×5卷积核，步长1，无Padding → 输出(6,28,28)
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=0),
            nn.Sigmoid(),  # 论文原版激活函数（非ReLU）
            # S2层：2×2平均池化，步长2 → 输出(6,14,14)
            nn.AvgPool2d(kernel_size=2, stride=2),

            # C3层：16个5×5卷积核，步长1，无Padding → 输出(16,10,10)
            # 论文中C3为「部分连接」，此处简化为全连接（性能一致，代码更简洁）
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0),
            nn.Sigmoid(),
            # S4层：2×2平均池化，步长2 → 输出(16,5,5)
            nn.AvgPool2d(kernel_size=2, stride=2)
        )

        # 分类部分（全连接层）
        self.classifier = nn.Sequential(
            # F5层：16×5×5=400 → 120维
            nn.Linear(in_features=16*5*5, out_features=120),
            nn.Sigmoid(),
            # F6层：120 → 84维（论文中对应7×12编码）
            nn.Linear(in_features=120, out_features=84),
            nn.Sigmoid(),
            # 输出层：84 → 10维（0-9数字，替代论文RBF层）
            nn.Linear(in_features=84, out_features=10)
        )

    def forward(self, x):
        # 前向传播：输入 → 特征提取 → 展平 → 分类
        x = self.features(x)
        x = x.view(-1, 16*5*5)  # 展平：(batch_size, 16*5*5)
        x = self.classifier(x)
        return x

### 初始化模型并转移到CUDA上

In [7]:
model = LeNet5().to(DEVICE)
print(model)

LeNet5(
  (features): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (classifier): Sequential(
    (0): Linear(in_features=400, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


### 配置训练组件

In [8]:
criterion = nn.CrossEntropyLoss() # 交叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE,momentum=0.9) # 优化器：SGD+动量

In [None]:
# 记录训练历史（用于后续可视化）
train_loss_history = []
train_acc_history = []
val_loss_history = []
val_acc_history = []

for epoch in range(EPOCHS):
    # 训练阶段
    model.train()  # 开启训练模式（影响Dropout等层，此处无但规范）
    train_loss = 0.0
    correct_train = 0
    total_train = 0

    for batch_idx, (data, targets) in enumerate(train_loader):
        # 数据移至设备
        data, targets = data.to(DEVICE), targets.to(DEVICE)

        # 前向传播
        outputs = model(data)
        loss = criterion(outputs, targets)

        # 反向传播+参数更新
        optimizer.zero_grad()  # 清空梯度
        loss.backward()  # 计算梯度
        optimizer.step()  # 更新参数

        # 统计训练损失和准确率
        train_loss += loss.item() * data.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total_train += targets.size(0)
        correct_train += (predicted == targets).sum().item()

    # 计算训练集平均损失和准确率
    avg_train_loss = train_loss / total_train
    train_acc = correct_train / total_train
    train_loss_history.append(avg_train_loss)
    train_acc_history.append(train_acc)

    # 验证阶段
    model.eval()  # 开启评估模式
    val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():  # 禁用梯度计算，节省内存
        for data, targets in test_loader:
            data, targets = data.to(DEVICE), targets.to(DEVICE)
            outputs = model(data)
            loss = criterion(outputs, targets)

            # 统计验证损失和准确率
            val_loss += loss.item() * data.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_val += targets.size(0)
            correct_val += (predicted == targets).sum().item()

    # 计算验证集平均损失和准确率
    avg_val_loss = val_loss / total_val
    val_acc = correct_val / total_val
    val_loss_history.append(avg_val_loss)
    val_acc_history.append(val_acc)

    # 打印每轮训练结果
    print(f"Epoch [{epoch+1}/{EPOCHS}], "
          f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.4f}, "
          f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.4f}")

# -------------------------- 6. 模型评估与可视化 --------------------------
# 1. 最终测试集评估
model.eval()
final_correct = 0
final_total = 0
with torch.no_grad():
    for data, targets in test_loader:
        data, targets = data.to(DEVICE), targets.to(DEVICE)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        final_total += targets.size(0)
        final_correct += (predicted == targets).sum().item()

print(f"\n最终测试集准确率：{final_correct/final_total:.4f}")
print(f"最终测试集错误率：{(1 - final_correct/final_total)*100:.2f}%")

# 2. 训练历史可视化（准确率+损失）
plt.figure(figsize=(12, 4))

# 准确率曲线
plt.subplot(1, 2, 1)
plt.plot(range(1, EPOCHS+1), train_acc_history, label='训练准确率')
plt.plot(range(1, EPOCHS+1), val_acc_history, label='验证准确率')
plt.title('LeNet-5 准确率变化')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)

# 损失曲线
plt.subplot(1, 2, 2)
plt.plot(range(1, EPOCHS+1), train_loss_history, label='训练损失')
plt.plot(range(1, EPOCHS+1), val_loss_history, label='验证损失')
plt.title('LeNet-5 损失变化')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# 3. 预测示例（随机展示5张测试图）
model.eval()
with torch.no_grad():
    # 随机选5个测试样本
    random_indices = np.random.choice(len(test_dataset), 5)
    plt.figure(figsize=(10, 5))

    for i, idx in enumerate(random_indices):
        data, target = test_dataset[idx]
        data = data.unsqueeze(0).to(DEVICE)  # 扩展为batch维度
        output = model(data)
        predicted = torch.argmax(output).item()

        # 绘制图像
        plt.subplot(1, 5, i+1)
        plt.imshow(data.squeeze().cpu().numpy(), cmap='gray')
        plt.title(f"预测：{predicted}\n真实：{target}")
        plt.axis('off')

plt.tight_layout()
plt.show()

Epoch [1/30], Train Loss: 2.3082, Train Acc: 0.1058, Val Loss: 2.3088, Val Acc: 0.0974
Epoch [2/30], Train Loss: 2.3068, Train Acc: 0.1051, Val Loss: 2.3052, Val Acc: 0.1010
Epoch [3/30], Train Loss: 2.3048, Train Acc: 0.1066, Val Loss: 2.3080, Val Acc: 0.1135
Epoch [4/30], Train Loss: 2.3040, Train Acc: 0.1085, Val Loss: 2.3054, Val Acc: 0.0982
Epoch [5/30], Train Loss: 2.3025, Train Acc: 0.1080, Val Loss: 2.3014, Val Acc: 0.1032
Epoch [6/30], Train Loss: 2.2981, Train Acc: 0.1204, Val Loss: 2.2876, Val Acc: 0.1009
Epoch [7/30], Train Loss: 1.9118, Train Acc: 0.3695, Val Loss: 1.0598, Val Acc: 0.6625
Epoch [8/30], Train Loss: 0.8013, Train Acc: 0.7382, Val Loss: 0.6083, Val Acc: 0.8181
Epoch [9/30], Train Loss: 0.5002, Train Acc: 0.8552, Val Loss: 0.3871, Val Acc: 0.8862
Epoch [10/30], Train Loss: 0.3511, Train Acc: 0.8981, Val Loss: 0.2983, Val Acc: 0.9118
Epoch [11/30], Train Loss: 0.2833, Train Acc: 0.9167, Val Loss: 0.2387, Val Acc: 0.9296
Epoch [12/30], Train Loss: 0.2390, Train 