In [14]:
## 要常用tensor.item()这个方法，似乎可以很容易地获取到向量的值。

# 载入库

In [1]:
import numpy as np
import torch
from torchvision.datasets import mnist # 导入 pytorch 内置的 mnist 数据

from torch import nn
from torch.autograd import Variable

# 载入原始数据以及标准化

In [2]:
def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5 # 标准化，这个技巧之后会讲到
    x = x.reshape((-1,)) # 拉平
    x = torch.from_numpy(x)
    return x

train_set = mnist.MNIST('./data', train=True, transform=data_tf, download=True) # 重新载入数据集，申明定义的数据变换
test_set = mnist.MNIST('./data', train=False, transform=data_tf, download=True)

In [3]:
## train_set 的形状大概就像是，
# [
#     (data, label),
#     (data, label),
#     (data, label),
#     (data, label),
#     (data, label),
# ]

# 准备数据迭代器

In [4]:
from torch.utils.data import DataLoader
# 使用 pytorch 自带的 DataLoader 定义一个数据迭代器
train_data = DataLoader(train_set, batch_size=64, shuffle=True)
test_data = DataLoader(test_set, batch_size=128, shuffle=False)

# 建立模型

In [6]:
# 使用 Sequential 定义 4 层神经网络
net = nn.Sequential(
    nn.Linear(784, 400),
    nn.ReLU(),
    nn.Linear(400, 200),
    nn.ReLU(),
    nn.Linear(200, 100),
    nn.ReLU(),
    nn.Linear(100, 10),
)

In [7]:
nn.Softmax()

Softmax(dim=None)

In [8]:
net

Sequential(
  (0): Linear(in_features=784, out_features=400, bias=True)
  (1): ReLU()
  (2): Linear(in_features=400, out_features=200, bias=True)
  (3): ReLU()
  (4): Linear(in_features=200, out_features=100, bias=True)
  (5): ReLU()
  (6): Linear(in_features=100, out_features=10, bias=True)
)

# 定义loss和优化器

In [9]:
# 定义 loss 函数
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), 1e-1) # 使用随机梯度下降，学习率 0.1

# 开始训练，记录损失

In [10]:
import tqdm

In [12]:
mean_losses = []
mean_accs = []
mean_eval_losses = []
mean_eval_accs = []

for epc in tqdm.tqdm(range(20)):
    losses = []
    accs = []
    net.train() ## 开启训练模式。
    for x, y in train_data:
        y_ = net(x)
        loss = criterion(y_, y)
        optimizer.zero_grad() ## 梯度清空，不要累积。
        loss.backward() ## 链式求导，反向传播。
        optimizer.step() ## 更新参数。
        losses.append(loss.detach().numpy())
        accs.append(
            np.array(
                y == y_.max(1).indices
            ).sum()/y.shape[0]
        )
        
    mean_losses.append(np.array(losses).mean())
    mean_accs.append(np.array(accs).mean())
    

    eval_losses = []
    eval_accs = []
    net.eval() ## 关闭训练模式。
    for x, y in test_data:
        y_ = net(x)
        loss = criterion(y_, y)
        eval_losses.append(loss.detach().numpy())
        eval_accs.append(
            np.array(
                y == y_.max(1).indices
            ).sum()/y.shape[0]
        )
    mean_eval_losses.append(np.array(eval_losses).mean())
    mean_eval_accs.append(np.array(eval_accs).mean())

100%|████████████████████████████████████████████████████████████████████| 20/20 [00:37<00:00,  1.89s/it]


In [13]:
mean_eval_accs

[0.8670886075949367,
 0.9591574367088608,
 0.9418512658227848,
 0.9478837025316456,
 0.973496835443038,
 0.977254746835443,
 0.969442246835443,
 0.9763647151898734,
 0.9799248417721519,
 0.9788370253164557,
 0.9773536392405063,
 0.9792325949367089,
 0.9793314873417721,
 0.9817049050632911,
 0.9613330696202531,
 0.9824960443037974,
 0.9767602848101266,
 0.9822982594936709,
 0.9817049050632911,
 0.9838805379746836]