In [12]:
import numpy as np
import struct

In [13]:
def load_idx_images(path):
    with open(path, 'rb') as f:
        magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
        assert magic == 2051, f"Magic number mismatch, got {magic}"
        data = np.frombuffer(f.read(), dtype=np.uint8)
        # flat the data to N * 784 matrix and change to float32
        images = (data.reshape(num, rows*cols)).astype(np.float32) / 255.0
        return images

In [14]:
def load_idx_labels(path):
    with open(path, "rb") as f:
        magic, num = struct.unpack(">II", f.read(8))
        assert magic == 2049, f"Magic number mismatch, got {magic}"
        data = np.frombuffer(f.read(), dtype=np.uint8)
        return data

In [15]:
# 前向传播函数，得到Z
def forward(X: np.ndarray, W: np.ndarray, b: np.ndarray):
    Z = X.dot(W) + b
    return Z

In [16]:
# 计算损失函数的y_int版本
def cross_entropy_from_int(Y_hat: np.ndarray, y_int: np.ndarray, eps: int=1e-12):
    # 生成一个y_hat长度的index索引
    rows = np.arange(Y_hat.shape[0])
    # 使用高级索引，得到y_hat中所有需要得到的值，也就是对应正确答案的概率
    p_true = Y_hat[rows, y_int]
    # 把所有正确答案加eps求log，然后求负平均值
    loss = -np.mean(np.log(p_true + eps))
    return loss

In [17]:
# 计算损失函数对Zt的导数，作为求得梯度的前提
def d_loss_d_Z(Y_hat: np.ndarray, Y_onehot: np.ndarray, B: int):
    G = Y_hat.copy()
    G -= Y_onehot
    G /= B
    return G

In [18]:
# softmax处理，把Z变成概率矩阵
def softMax(Z: np.ndarray):
    Z_shift = Z - np.max(Z, axis=1, keepdims=True)
    exp_Z = np.exp(Z_shift)
    probs = exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
    return probs

In [19]:
images_path = "./t10k-images.idx3-ubyte"
labels_path = "./t10k-labels.idx1-ubyte"

X = load_idx_images(images_path)
Y = load_idx_labels(labels_path)

W = np.load("./W.npy")
b = np.load("./b.npy")

Z = forward(X, W, b)
probs = softMax(Z)
loss = cross_entropy_from_int(probs, Y)
acc = (probs.argmax(axis=1) == Y).mean()
print(f"loss: {loss:4f}, acc = {acc:4f}")

loss: 0.273866, acc = 0.923600
