# 下载MNIST手写数字数据集
### datasets.MNIST参数说明
| 参数 | 说明   | 
| ----- | --------- | 
| root | 代表训练集/测试集路径 |
| train  | True代表训练集, False代表测试集 |
| transform | 将图片image转换成Tensor |
| download | 是否下载，第一次使用需要 |

In [12]:
import torch 
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms # pytorch计算机视觉库，含多种数据集
import torchvision
from torch.autograd import Variable
from torch.utils.data import DataLoader
import cv2 # opencv
# 训练集
train_dataset = datasets.MNIST(root="E:\\pytorch_study\\lab\\data\\",train=True,transform=transforms.ToTensor(),download=False)
# 测试集
test_dataset = datasets.MNIST(root="E:\\pytorch_study\\lab\\data\\",train=False,transform=transforms.ToTensor(),download=False)

# Network Architecture

![jupyter](../material_image/LeNet-5.png)
> 最大池化代表图中的Subsampling(上采样)
##### 卷积层1+最大池化层
- 输入为28x28x1的黑白图像，颜色通道为1，如果RGB图像则为3(R,G,B)，所以in_channels=1
- 输出为28x28x6的feature map，通道为6，out_channels=6，feature map单层大小:28*28(32-5+1)
- 卷积核尺寸为5x5，6通道对应6种卷积核，kernel_size=5
- 步长为1，stride=1
- 填充为2，padding=2，在外层补两圈0，图片计算尺寸为32x32
- 结果通过ReLU线性修正单元
- 再经过最大池化MaxPool2d，池化滑动窗口2x2，kernel_size=2，步长为2，stride=2
- 经过最大池化后的结果为14x14x6的三阶张量，进入卷积层2

##### 卷积层2+最大池化层
- 输入为卷积层1最大池化后的输出14x14x6的Tensor（三阶张量）
- 输出为10x10x16的Tensor，out_channels=16，卷积核尺寸为5x5，kernel_size=5
- 默认步长为1，不进行填充
- 结果通过ReLU
- 再经过最大池化MaxPool2d，池化滑动窗口2x2，kernel_size=2，步长为2，stride=2
- 最大池化后结果尺寸为5x5x16，进入全连接层1

##### 全连接层1
- 输入为卷积层2最大池化后的输出5x5x16，卷积核种类120，卷积核尺寸5x5
- 输出为120x1的一阶张量
- 经过优化函数后再通过ReLU

##### 全连接层2
- 输入为全连接层1的输出120x1的一阶张量
- 输出为84x1的一阶张量
- 经过优化函数后再通过ReLU,然后输入全连接层3

##### 全连接层3
- 输入为全连接层2的输入84x1
- 输出为10x1的一阶张量，10代表十个手写阿拉伯数字类型

In [13]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        # 卷积层1+最大池化
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=1,out_channels=6, kernel_size=5, stride=1, padding=2), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2))
        # 卷积层2+最大池化
        self.conv2 = nn.Sequential(nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2))
        # 全连接层1
        self.fc1 = nn.Sequential(nn.Linear(16 * 5 * 5, 120), nn.BatchNorm1d(120), nn.ReLU())
        # 全连接层2
        self.fc2 = nn.Sequential(nn.Linear(120, 84), nn.BatchNorm1d(84), nn.ReLU())
        # 全连接层2
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size()[0], -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x
    

# 训练前准备

In [14]:
print("whether to support cuda")
print(torch.cuda.is_available()) # 输出是否支持cuda
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 自动识别使用cuda还是cpu
batch_size = 64
LR = 0.001
# 装载训练集
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

# 装载测试集
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

net = LeNet().to(device)
# 损失函数使用交叉熵
criterion = nn.CrossEntropyLoss()
# 优化函数使用 Adam
optimizer = optim.Adam(
    net.parameters(),
    lr=LR,
)

whether to support cuda
True


# 可视化训练集(jupyter notebook环境下不可执行)

In [None]:
images, labels = next(iter(train_loader))
img = torchvision.utils.make_grid(images)

img = img.numpy().transpose(1, 2, 0)
std = [0.5, 0.5, 0.5]
mean = [0.5, 0.5, 0.5]
img = img * std + mean
print(labels)
cv2.imshow('win', img)
key_pressed = cv2.waitKey(0)

# Training model

In [15]:
print('开始测试')
print('损失函数:')
epoch = 1
if __name__ == '__main__':
    for epoch in range(epoch):
        sum_loss = 0.0
        for i, data in enumerate(train_loader):
            inputs, labels = data
            inputs, labels = Variable(inputs).cuda(), Variable(labels).cuda()
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward() #反向传播
            optimizer.step()
            
            sum_loss += loss.item()
            if i % 100 == 99:
                print('[%d,%d] loss:%.03f' % (epoch + 1, i + 1, sum_loss / 100))
                sum_loss = 0.0

开始测试
损失函数:
[1,100] loss:0.681
[1,200] loss:0.179
[1,300] loss:0.124
[1,400] loss:0.102
[1,500] loss:0.094
[1,600] loss:0.074
[1,700] loss:0.075
[1,800] loss:0.067
[1,900] loss:0.055


# Test

In [16]:
net.eval()
correct = 0
total = 0
for data_test in test_loader:
    images, labels = data_test
    images, labels = Variable(images).cuda(), Variable(labels).cuda()
    outputs_test = net(images)
    _, predicted = torch.max(outputs_test, 1) #取出10种手写数字概率最大的一个
    total += labels.size(0)
    correct += (predicted == labels).sum()
print("correct: ", correct)
print("Test accuracy: {0}".format(correct.item() / len(test_dataset)))

correct:  tensor(9828, device='cuda:0')
Test accuracy: 0.9828
