## PyTorch实现手写数字识别器
> mnist是一个开源的手写数字数据集，借此实现一个简单的手写数字识别的网络

相关参考

* https://www.jb51.net/article/208404.htm
* https://www.jb51.net/article/141074.htm
* https://www.jb51.net/article/211872.htm

### 数据的处理
> 使用pytorch自带的包进行数据的预处理

直接将图片标准化到了-1到1的范围，标准化的原因就是因为如果某个数在数据中很大很大，就导致其权重较大，从而影响到其他数据

本身我们的数据都是平等的，所以标准化后将数据分布到-1到1的范围，使得所有数据都不会有太大的权重导致网络出现巨大的波动

trainloader现在是一个可迭代的对象，那么我们可以使用for循环进行遍历了，由于是使用yield返回的数据，为了节约内存


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

transform = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.5), (0.5))
])
# www.di.ens.fr/~lelarge/MNIST.tar.gz
train_data = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True,num_workers=2)

# 注释
# transforms.Normalize 用于数据的标准化，具体实现( mean, std)
# mean:均值 总和后除个数
# std:方差 每个元素减去均值再平方再除个数
# norm_data = (tensor - mean) / std

### 数据的查看

In [None]:
def imshow(img):
   img = img / 2 + 0.5 # unnormalize 传入的是Tensor
   npimg = img.numpy()
   plt.imshow(np.transpose(npimg, (1, 2, 0))) # 将通道维度置在第三个维度
   plt.show()
# torchvision.utils.make_grid 将图片进行拼接
imshow(torchvision.utils.make_grid(iter(train_data_loader).next()[0]))

### 构建网络
1. 卷积层使用 torch.nn.Conv2d
2. 激活层使用 torch.nn.ReLU
3. 池化层使用 torch.nn.MaxPool2d
4. 全连接层使用 torch.nn.Linear

In [None]:
from torch import nn
import torch.nn.functional as F

In [None]:
# 示例模型 一
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=28, kernel_size=5)     # 14
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2)                         # 无参数学习因此无需设置两个
    self.conv2 = nn.Conv2d(in_channels=28, out_channels=28*2, kernel_size=5)  # 7
    self.fc1 = nn.Linear(in_features=28*2*4*4, out_features=1024)
    self.fc2 = nn.Linear(in_features=1024, out_features=10) # 最后输出 10 个分类
  def forward(self, inputs):                  # Size([32, 1, 28, 28])
    x = self.pool(F.relu(self.conv1(inputs))) # Size([32, 28, 12, 12])
    x = self.pool(F.relu(self.conv2(x)))      # Size([32, 56, 4, 4])
    x = x.view(inputs.size()[0],-1)           # Size([32, 896])
    x = F.relu(self.fc1(x))                   # Size([32, 1024])
    return self.fc2(x)                        # Size([32, 10])

In [None]:
# 示例模型 二
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(1, 6, 3, 1, 2),nn.ReLU(), nn.MaxPool2d(2, 2))
        self.conv2 = nn.Sequential(nn.Conv2d(6, 16, 5), nn.ReLU(),nn.MaxPool2d(2, 2))
        self.fc1 = nn.Sequential(nn.Linear(16 * 5 * 5, 120),nn.BatchNorm1d(120), nn.ReLU())
        self.fc2 = nn.Sequential(
            nn.Linear(120, 84),
            nn.BatchNorm1d(84),
            nn.ReLU(),
            nn.Linear(84, 10))
        # 最后的结果一定要变为 10，因为数字的选项是 0 ~ 9
    def forward(self, x):                   # Size([32, 1, 28, 28])
        x = self.conv1(x)                   # Size([32, 6, 15, 15])
        x = self.conv2(x)                   # Size([32, 16, 5, 5])
        x = x.view(x.size()[0], -1)         # Size([32, 400]) 对参数实现扁平化
        x = self.fc1(x)                     # Size([32, 120])
        x = self.fc2(x)                     # Size([32, 10])
        return x

![](https://img.jbzj.com/file_images/article/202103/2021032611485914.gif)
in_channels: 为输入通道数 彩色图片有3个通道 黑白有1个通道
out_channels: 输出通道数
kernel_size: 卷积核的大小
stride: 卷积的步长
padding: 外边距大小

输出的size计算公式:
h = (h - kernel_size + 2*padding)/stride + 1

w = (w - kernel_size + 2*padding)/stride + 1

MaxPool2d:是没有参数进行运算的

### 实例化网络优化器，并且使用GPU进行训练

In [None]:
# net = Net()
net = LeNet()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
print(net)
for name , parameter in net.named_parameters():
    print(name, parameter) # 查看 默认参数
# Net(
#  (conv1): Conv2d(1, 28, kernel_size=(5, 5), stride=(1, 1))
#  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#  (conv2): Conv2d(28, 56, kernel_size=(5, 5), stride=(1, 1))
#  (fc1): Linear(in_features=896, out_features=1024, bias=True)
#  (fc2): Linear(in_features=1024, out_features=10, bias=True)
# )

### 训练

* 一般训练模型时，加上model.train() , 会正常使用 Batch Normalization 和 Dropout
* 一般训练模型时，加上model.eval() , 不会正常使用 Batch Normalization 和 Dropout

In [None]:
criterion = nn.CrossEntropyLoss() # 损失函数使用交叉熵
opt = torch.optim.Adam(params=net.parameters(), lr=0.001) # 优化函数使用 Adam 自适应优化算法
for epoch in range(10):
  for images, labels in train_data_loader:
    images = images.to(device)
    labels = labels.to(device)
    pre_label = net(images)
    loss = criterion(pre_label, labels)
    # loss = F.cross_entropy(input=pre_label, target=labels).mean()
    pre_label = torch.argmax(pre_label, dim=1) # torch.argmax 计算最大数所在索引值
    acc = (pre_label == labels).sum() / torch.tensor(labels.size()[0], dtype=torch.float32)
    net.zero_grad()
    loss.backward()
    opt.step()
  print(epoch, acc.detach().cpu().numpy(), loss.detach().cpu().numpy())


In [None]:
# 预测

test_set = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=True,num_workers=2)
images, labels = iter(test_loader).next()
images = images.to(device)
labels = labels.to(device)
with torch.no_grad():
  pre_label = net(images)
  pre_label = torch.argmax(pre_label, dim=1)
  acc = (pre_label==labels).sum() / torch.tensor(labels.size()[0], dtype=torch.float32)
  print(acc)