In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

batch_size = 512
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose ([transforms.ToTensor() ])

trainloader = torch.utils.data.DataLoader (
    datasets.MNIST('data', train=True, download=True, transform=transform), 
    batch_size=batch_size, 
    shuffle=True
)

testloader = torch.utils.data.DataLoader (
    datasets.MNIST('data', train=False, download=True, transform=transform), 
    batch_size=batch_size, 
    shuffle=True
)

In [2]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        # self.conv1=#输入通道1,输出通道6,卷积核5x5
        self.conv1 = nn.Conv2d(1, 6, 5) 
        # self.conv2=#输入通道6,输出通道16,卷积核5x5
        self.conv2 = nn.Conv2d(6, 16, 5)
        # self.fc1 = #全连接层,输入5*5*16,输出120
        self.fc1 = nn.Linear(5 * 5 * 16, 120)
        # self.fc2 = #全连接层,输入120,输出84
        self.fc2 = nn.Linear(120, 84)
        # self.clf = #分类层,输入84,输出10
        self.clf = nn.Linear(84, 10)
    
    def forward(self, x):
        x = F.pad(x, (2, 2, 2, 2), "constant", 0) 
        # conv1
        x = self.conv1(x)
        # 激活函数sigmoid()
        x = F.sigmoid(x)
        # 平均池化层,kernel = 2x2,步长2
        x = F.avg_pool2d(x, kernel_size=2, stride=2)
        
        # conv2
        x = self.conv2(x)
        # 激活函数sigmoid()
        x = F.sigmoid(x)
        # 平均池化层,2x2,步长2
        x = F.avg_pool2d(x, kernel_size=2, stride=2)
        
        # 展平,从第1维开始展平
        x = x.view(x.size(0), -1)
        
        # 全连接层1
        x = self.fc1(x)
        # 激活函数sigmoid()
        x = F.sigmoid(x)
        
        # 全连接层2
        x = self.fc2(x)
        # 激活函数sigmoid()
        x = F.sigmoid(x)
        
        # 分类层
        x = self.clf(x)
        return x

model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-2)
print(model)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (clf): Linear(in_features=84, out_features=10, bias=True)
)


In [None]:

epochs = 10
accs, losses = [], []

for epoch in range(epochs):
    model.train()
    for batch_idx, (x, y) in enumerate(trainloader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        out = model(x)
        loss = F.cross_entropy(out, y)
        loss.backward()
        optimizer.step()
        
    model.eval()
    correct = 0
    testloss = 0
    with torch.no_grad():
        for batch_idx, (x, y) in enumerate(testloader):
            x, y = x.to(device), y.to(device)
            out = model(x)
            testloss += F.cross_entropy(out, y).item()
            pred = out.max(dim=1, keepdim=True)[1]
            correct += pred.eq(y.view_as(pred)).sum().item()
            
    acc = correct / len(testloader.dataset)
    testloss = testloss / (batch_idx + 1)
    accs.append(acc)
    losses.append(testloss)
    
    print('epoch: {}, loss: {:.4f}, acc:{:.4f}'.format(epoch, testloss, acc))

epoch: 0, loss: 2.3012, acc:0.1135
epoch: 1, loss: 2.3014, acc:0.1135


In [None]:
## Cell 4: 特征图可视化 (超保守版，严格遵循原始文件代码行 91-97)

# %%
model.eval()
with torch.no_grad():
    
    # 1. 临时加载一个 batch
    x_temp, _ = next(iter(testloader)) 
    x = x_temp.to(device)

    # 2. 严格执行 LeNet-5 的填充
    x = F.pad(x, (2, 2, 2, 2), "constant", 0) 
    
    # 3. 严格执行原始文件中的行 131-137 的特征提取，但这次我们不立即池化
    feature1 = F.sigmoid(model.conv1(x))
    # feature1 = F.avg_pool2d(feature1, kernel_size=2, stride=2) # 原始代码中注释掉了
    feature2 = F.sigmoid(model.conv2(feature1))
    # feature2 = F.avg_pool2d(feature2, kernel_size=2, stride=2) # 原始代码中注释掉了

    # 设置 N=5，严格对应原始文件行 91
    n = 5

    # 4. 仅将所需的前 N 个样本转移到 CPU (原始文件行 92-94)
    # x 此时是 512 个填充后的样本，我们取前 n 个原始样本进行可视化
    # 注意: 为了显示原始图像，我们应该使用未填充的 x_temp
    img = x_temp.detach().cpu().numpy()[:n]
    feature_map1 = feature1.detach().cpu().numpy()[:n]
    feature_map2 = feature2.detach().cpu().numpy()[:n]

# 5. 绘图 (严格对应原始文件行 96-97)
fig, ax = plt.subplots(3, n, figsize=(10, 10))

for i in range(n):
    # 原始图像 (ax[0,i] 之前可能是 ax[0,1])
    # 注意：为了让图片按 i 顺序排列，这里使用 ax[0, i] 而不是 ax[0, 1]
    ax[0, i].imshow(img[i].sum(0), cmap='gray') 
    ax[0, i].axis('off')
    
    # 第一层特征图
    ax[1, i].imshow(feature_map1[i].sum(0), cmap='gray')
    ax[1, i].axis('off')
    
    # 第二层特征图
    ax[2, i].imshow(feature_map2[i].sum(0), cmap='gray')
    ax[2, i].axis('off')

plt.tight_layout()
plt.show()