In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
import torch.nn as nn
%matplotlib inline

# 获得文件

In [18]:
from pathlib import Path
import requests

DATA_PATH = Path("dataset")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "http://deeplearning.net/data/mnist/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():
        content = requests.get(URL + FILENAME).content
        (PATH / FILENAME).open("wb").write(content)

In [60]:
import pickle
import gzip

with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
        ((X_train, y_train), (X_test, y_test), _) = pickle.load(f, encoding="latin-1")

# 处理训练数据

In [61]:
def mapTorch(data):
    return torch.tensor(data, dtype=torch.float32, requires_grad=True)

# 改为 torch 格式
(X_train, y_train, X_test, y_test) = map(
    mapTorch, (X_train, y_train, X_test, y_test)
)

# one-hot 编码
# y_train = F.one_hot(y_train.type(torch.int64))
# y_test = F.one_hot(y_test.type(torch.int64))
y_train[:3]

tensor([5., 0., 4.], grad_fn=<SliceBackward>)

# 创建网络

In [78]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size),
            nn.Softmax(dim=1)
        )
    
    def forward(self,X):
        return self.layers(X)

In [84]:
# 类实例化
input_size = 784
hidden_size = 128
output_size = 10
net = Net(input_size, hidden_size, output_size)

# 损失函数
losses = []
cost = F.cross_entropy

# 优化器
learning_rate = 0.01
optimizer = torch.optim.Adam(net.parameters(), learning_rate)

# 循环和batch_size
train_size = X_train.shape[0]
n_iters = 2000
batch_size = 200
iters_per_epoch = max(train_size / batch_size, 1)

In [85]:
# 训练
epoch=1
for iter in range(n_iters):
    # 训练的batch
    batch_mask = np.random.choice(train_size, batch_size)
    X_batch = X_train[batch_mask]
    y_batch = y_train[batch_mask]

    # forward
    out = net.forward(X_batch)

    # loss
    loss = cost(out,y_batch.long())  # 预测的格式 out: [0.6,0.2,0.01,...]，真实值不能是ont-hot编码，要是一维的数据类似[1,2,4,1,...]，并且是长整型，函数会自动转换

    # backward
    loss.backward()

    # 更新权重参数
    optimizer.step()

    # 梯度清零
    optimizer.zero_grad()

    losses.append(loss.data.numpy()) 

    if not (iter+1) % iters_per_epoch:
        print(f"the no.{iter+1} epoch: {epoch}, loss: {loss}")
        epoch += 1

the no.250 epoch: 1, loss: 1.6199427843093872
the no.500 epoch: 2, loss: 1.5965991020202637
the no.750 epoch: 3, loss: 1.5052868127822876
the no.1000 epoch: 4, loss: 1.5006613731384277
the no.1250 epoch: 5, loss: 1.5021178722381592
the no.1500 epoch: 6, loss: 1.5077366828918457
the no.1750 epoch: 7, loss: 1.4978522062301636
the no.2000 epoch: 8, loss: 1.4860246181488037


# 在测试集的结果

In [111]:
# 预测测试集结果
y_pre = net.forward(X_test).data.numpy()

In [112]:
# one-hot --> normal
y_pre = np.argmax(y_pre,axis=1)

from sklearn.metrics import precision_score
precision_score(y_pre, y_test.data.numpy(), average="macro")

0.9641686703108941

# 在训练集的结果

In [114]:
y_pre = net.forward(X_train).data.numpy()
y_pre = np.argmax(y_pre,axis=1)

precision_score(y_pre, y_train.data.numpy(), average="macro")

0.9700150059742081