## 准备数据

In [1]:
import torch
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 禁用PyTorch的梯度跟踪
torch.set_grad_enabled(False)

# 设备配置（仅使用CPU）
device = torch.device('cpu')

# 数据预处理（转换为numpy数组）
def to_numpy(dataset):
    data = dataset.data.numpy().astype(np.float32) / 255.0
    labels = dataset.targets.numpy()
    one_hot = np.zeros((labels.size, 10), dtype=np.float32)
    one_hot[np.arange(labels.size), labels] = 1.0
    return data.reshape(-1, 28*28), one_hot

# 加载数据集
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

# 转换为numpy数组
train_data, train_labels = to_numpy(train_dataset)
test_data, test_labels = to_numpy(test_dataset)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw



## 手动实现全连接层

In [2]:
class LinearLayer:
    def __init__(self, input_dim, output_dim):
        self.W = np.random.randn(input_dim, output_dim) * 0.01
        self.b = np.zeros((1, output_dim))
        self.cache = None
    
    def forward(self, x):
        out = np.dot(x, self.W) + self.b
        self.cache = x  # 保存输入用于反向传播
        return out
    
    def backward(self, dout):
        x = self.cache
        dW = np.dot(x.T, dout)
        db = np.sum(dout, axis=0, keepdims=True)
        dx = np.dot(dout, self.W.T)
        return dx, dW, db

# 手动实现ReLU
class ReLU:
    def __init__(self):
        self.cache = None
    
    def forward(self, x):
        self.cache = x
        return np.maximum(0, x)
    
    def backward(self, dout):
        x = self.cache
        dx = dout * (x > 0)
        return dx

# 手动实现Softmax+CrossEntropy
class SoftmaxCELoss:
    def __init__(self):
        self.cache = None
    
    def forward(self, x, y_true):
        # Softmax
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        probs = exp_x / np.sum(exp_x, axis=1, keepdims=True)
        
        # Cross-Entropy Loss
        loss = -np.sum(y_true * np.log(probs + 1e-12)) / x.shape[0]
        self.cache = (probs, y_true)
        return loss
    
    def backward(self):
        probs, y_true = self.cache
        dx = (probs - y_true) / probs.shape[0]
        return dx

## 建立模型

In [3]:
# 模型定义
class ManualNN:
    def __init__(self):
        self.fc1 = LinearLayer(28*28, 100)
        self.relu = ReLU()
        self.fc2 = LinearLayer(100, 10)
        self.loss = SoftmaxCELoss()
    
    def forward(self, x, y_true):
        # 前向传播
        x = self.fc1.forward(x)
        x = self.relu.forward(x)
        x = self.fc2.forward(x)
        loss = self.loss.forward(x, y_true)
        return loss
    
    def backward(self):
        # 反向传播
        dx = self.loss.backward()
        dx, dW2, db2 = self.fc2.backward(dx)
        dx = self.relu.backward(dx)
        dx, dW1, db1 = self.fc1.backward(dx)
        return dW1, db1, dW2, db2


## 实际训练

In [4]:
# 训练参数
model = ManualNN()
learning_rate = 1e-5
batch_size = 64
n_epochs = 50

# 训练循环
for epoch in range(n_epochs):
    # 随机打乱数据
    permutation = np.random.permutation(train_data.shape[0])
    
    # Mini-batch训练
    for i in range(0, train_data.shape[0], batch_size):
        # 获取batch数据
        indices = permutation[i:i+batch_size]
        x_batch = train_data[indices]
        y_batch = train_labels[indices]
        
        # 前向传播
        loss = model.forward(x_batch, y_batch)
        
        # 反向传播计算梯度
        dW1, db1, dW2, db2 = model.backward()
        
        # 手动更新参数
        model.fc1.W -= learning_rate * dW1
        model.fc1.b -= learning_rate * db1
        model.fc2.W -= learning_rate * dW2
        model.fc2.b -= learning_rate * db2
    
    # 每个epoch计算测试集准确率
    # 前向传播计算预测结果
    scores = model.fc2.forward(
        model.relu.forward(
            model.fc1.forward(test_data)
        )
    )
    preds = np.argmax(scores, axis=1)
    truth = np.argmax(test_labels, axis=1)
    accuracy = np.mean(preds == truth)
    
    print(f"Epoch {epoch+1}/{n_epochs} | Loss: {loss:.4f} | Test Acc: {accuracy*100:.2f}%")

Epoch 1/50 | Loss: 2.3040 | Test Acc: 10.96%
Epoch 2/50 | Loss: 2.3043 | Test Acc: 11.23%
Epoch 3/50 | Loss: 2.3039 | Test Acc: 11.50%
Epoch 4/50 | Loss: 2.3035 | Test Acc: 11.78%
Epoch 5/50 | Loss: 2.3025 | Test Acc: 12.03%
Epoch 6/50 | Loss: 2.3022 | Test Acc: 12.23%
Epoch 7/50 | Loss: 2.3007 | Test Acc: 12.48%
Epoch 8/50 | Loss: 2.3024 | Test Acc: 12.82%
Epoch 9/50 | Loss: 2.3051 | Test Acc: 13.19%
Epoch 10/50 | Loss: 2.3041 | Test Acc: 13.30%
Epoch 11/50 | Loss: 2.3022 | Test Acc: 13.63%
Epoch 12/50 | Loss: 2.3040 | Test Acc: 13.92%
Epoch 13/50 | Loss: 2.3029 | Test Acc: 14.21%
Epoch 14/50 | Loss: 2.3026 | Test Acc: 14.58%
Epoch 15/50 | Loss: 2.3001 | Test Acc: 14.81%
Epoch 16/50 | Loss: 2.3011 | Test Acc: 14.99%
Epoch 17/50 | Loss: 2.3017 | Test Acc: 15.27%
Epoch 18/50 | Loss: 2.3011 | Test Acc: 15.53%
Epoch 19/50 | Loss: 2.3013 | Test Acc: 15.99%
Epoch 20/50 | Loss: 2.3013 | Test Acc: 16.38%
Epoch 21/50 | Loss: 2.3019 | Test Acc: 16.74%
Epoch 22/50 | Loss: 2.3010 | Test Acc: 17.1