# Mnist分类任务：

- 网络基本构建与训练方法，常用函数解析

- torch.nn.functional模块

- nn.Module模块


# 读取Mnist数据集
- 会自动进行下载

In [None]:
%matplotlib inline

In [None]:
from pathlib import Path
import requests

DATA_PATH = Path("data")
PATH = DATA_PATH / "分类任务_mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "http://deeplearning.net/data/mnist/"
FILENAME = "分类任务_mnist.pkl.gz"

if not (PATH / FILENAME).exists():
        content = requests.get(URL + FILENAME).content
        (PATH / FILENAME).open("wb").write(content)

In [None]:
import pickle
import gzip

with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")

784是mnist数据集每个样本的像素点个数

In [None]:
from matplotlib import pyplot
import numpy as np

pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray")
print(x_train.shape)

<img src="./img/4.png" alt="FAO" width="790">

<img src="./img/5.png" alt="FAO" width="790">

## 注意数据需转换成tensor才能参与后续建模训练


In [None]:
import torch

x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
x_train, x_train.shape, y_train.min(), y_train.max()
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())

# 定义get_data函数

In [None]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

bs=64 # batch size

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True) # DataLoader是用来提供batch数据用的

valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)

In [None]:
def get_data(train_ds, valid_ds, bs):
    return (
        DataLoader(train_ds, batch_size=bs, shuffle=True),
        DataLoader(valid_ds, batch_size=bs * 2),
    )

# 定义model结构,优化器optim

- 必须继承nn.Module且在其构造函数中需调用nn.Module的构造函数
- 无需写反向传播函数，nn.Module能够利用autograd自动实现反向传播
- Module中的可学习参数可以通过named_parameters()或者parameters()返回迭代器

In [None]:
from torch import nn

class Mnist_NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(784, 128)
        self.hidden2 = nn.Linear(128, 256)
        self.hidden3 = nn.Linear(256, 512)
        self.out  = nn.Linear(512, 10)

    # torch只需要写前向传播, 反向传播是自动实现的
    def forward(self, x):
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = F.relu(self.hidden3(x))
        x = self.out(x)
        return x


In [None]:
from torch import optim
def get_model():
    model = Mnist_NN()
    return model, optim.Adam(model.parameters(), lr=0.001) # 更新所有参数

<img src="./img/dropout.png" alt="dropout" width="790">

In [None]:
net = Mnist_NN()
print(net)

可以打印我们定义好名字里的权重和偏置项

In [None]:
for name, parameter in net.named_parameters():
    print(name, parameter,parameter.size()) # name是上面定义的hidden1, out

# 定义loss_batch

1. 计算batch的loss
2. 计算梯度
3. 执行更新
4. 清空梯度

torch.nn.functional 很多层和函数在这里都会见到
torch.nn.functional中有很多功能，后续会常用的。那什么时候使用nn.Module，什么时候使用nn.functional呢？一般情况下，如果模型有可学习的参数，最好用nn.Module，其他情况nn.functional相对更简单一些

In [None]:
import torch.nn.functional as F

loss_func = F.cross_entropy

In [None]:
def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb) 

    if opt is not None:
        loss.backward() # 反向传播, 计算所有权重参数的梯度
        opt.step() # 计算完梯度后, 更新权重(用到了学习率lr, 优化器)
        opt.zero_grad() # pytorch默认累加梯度, 这里每个batch要清零

    return loss.item(), len(xb)

- 一般在训练模型时加上model.train()，这样会正常使用Batch Normalization和 Dropout
- 测试的时候一般选择model.eval()，这样就不会使用Batch Normalization和 Dropout

# 定义fit训练函数

In [None]:
import numpy as np

# fit(训练函数)
# steps:epoch
def fit(steps, model, loss_func, opt, train_dl, valid_dl):
    for step in range(steps): 
        model.train() # 指定训练模式, 会去更新权重参数
        for xb, yb in train_dl:
            loss_batch(model, loss_func, xb, yb, opt)

        model.eval() # 指定验证模式, 不更新权重参数
        with torch.no_grad():
            losses, nums = zip(
                *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
            )
        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
        print('当前step:'+str(step), '验证集损失：'+str(val_loss))

# 三行搞定！

In [50]:
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model, opt = get_model()
fit(25, model, loss_func, opt, train_dl, valid_dl)

当前step:0 验证集损失：2.2794597221374513
当前step:1 验证集损失：2.2457932403564453
当前step:2 验证集损失：2.1932592220306395
当前step:3 验证集损失：2.1076241397857665
当前step:4 验证集损失：1.9733422183990479
当前step:5 验证集损失：1.7811703548431397
当前step:6 验证集损失：1.5395125650405883
当前step:7 验证集损失：1.2865864498138428
当前step:8 验证集损失：1.0694965614318848
当前step:9 验证集损失：0.9049897844314575
当前step:10 验证集损失：0.7857047427177429
当前step:11 验证集损失：0.6980518156051636
当前step:12 验证集损失：0.63350131483078
当前step:13 验证集损失：0.5833517439842224
当前step:14 验证集损失：0.5442024567604065
当前step:15 验证集损失：0.5123786268234253
当前step:16 验证集损失：0.4862543400764465
当前step:17 验证集损失：0.4646530394077301
当前step:18 验证集损失：0.44631045379638673
当前step:19 验证集损失：0.4306726737499237
当前step:20 验证集损失：0.4171604441642761
当前step:21 验证集损失：0.40569293961524966
当前step:22 验证集损失：0.3955977390766144
当前step:23 验证集损失：0.38638820929527284
当前step:24 验证集损失：0.37866533803939817


# 计算出准确度

In [None]:
correct = 0
total_instance_num = 0
for steps in range(25):
    for xb,  yb in valid_dl: 
        _, predicted = torch.max(model(xb), 1)
        total_instance_num += yb.size(0)
        correct += (predicted == yb).sum().item()

print("accuracy is {}%".format(correct * 100 / total_instance_num ))
        