5.5 卷积神经网络 LeNet
    -与多层感知机对比（图像分类任务）。在多层感知机中，每张图像的高和宽是28像素，将图像中的像素逐行展开，得到长度为784的向量，输入到全连接层中。
        缺点：
        *图像在同一列邻近的像素在这个向量中可能相距较远，相关性不能被模型很好的学习。
        *对于大尺寸图像，系统开销很大。
    -卷积层
        *卷积层保留了图像的形状，使图像像素在高和宽两个方向上的相关性能被有效识别；
        *卷积层通过滑动窗口使同一卷积核与不同位置的输入重复计算。

5.5.1 LeNet模型

In [3]:
import time
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn, optim

import sys
sys.path.append('..')
import d2lzh_pytorch as d2l

class LeNet(nn.Module):
    def __init__(self) -> None:
        super(LeNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2), # kernel_size, stride
            nn.Conv2d(6, 16, 5),
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(16*4*4, 120),
            nn.Sigmoid(),   #值域变为0-1
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10)
        )
    
    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output

net = LeNet()
net

LeNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)

5.5.2 获取数据和训练模型

In [8]:
import torchvision
import torchvision.transforms as transforms
batch_size = 256
train_data = torchvision.datasets.FashionMNIST(root='../Datasets/', train=False, transform=transforms.ToTensor())
test_data = torchvision.datasets.FashionMNIST(root='../Datasets/', train=False, transform=transforms.ToTensor())
train_iter = torch.utils.data.DataLoader(train_data, batch_size, shuffle=True)
test_iter = torch.utils.data.DataLoader(test_data, batch_size, shuffle=True)

def evaluate_acccuracy(data_iter, net, device=None):
    if device is None and isinstance(net, nn.Module):
        device = list(net.parameters())[0].device
    
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, nn.Module):
                net.eval()
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().item()
                net.train()
            else:   # 自定义的模型
                if 'is_training' in net.__code__.co_varnames:
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().cpu().item()
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        
            n += y.shape[0]
    return acc_sum / n

def train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print('training on', device)
    loss = nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)

            optimizer.zero_grad()
            l.backward()
            optimizer.step()

            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_acccuracy(test_iter, net, device)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))

lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on cuda
epoch 1, loss 2.3055, train acc 0.098, test acc 0.100, time 12.0 sec
epoch 2, loss 2.2791, train acc 0.180, test acc 0.342, time 2.2 sec
epoch 3, loss 1.9809, train acc 0.332, test acc 0.463, time 2.3 sec
epoch 4, loss 1.5370, train acc 0.476, test acc 0.539, time 2.2 sec
epoch 5, loss 1.2828, train acc 0.553, test acc 0.562, time 2.2 sec
