In [5]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms

# 定义模型
net = nn.Sequential(
    nn.Flatten(),        #层1
    nn.Linear(784, 256), #层2
    nn.ReLU(),           #层3
    nn.Linear(256, 10)   #层4
)

# 初始化权重
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01) #常见的初始化方法包括正态分布初始化、均匀分布、Xavier、Kaiming等。
net.apply(init_weights) 

# 超参数
batch_size, lr, num_epochs = 256, 0.13, 20
loss_fn = nn.CrossEntropyLoss()
trainer = torch.optim.SGD(net.parameters(), lr=lr)

# 数据加载
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = torchvision.datasets.FashionMNIST(root="./data", train=True,
                                                  transform=transform, download=True)
test_dataset = torchvision.datasets.FashionMNIST(root="./data", train=False,
                                                 transform=transform, download=True)
train_iter = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# 评估函数
# 准确率是分类任务中最常见的评价指标之一，反映模型预测结果与真实标签的符合程度。
def evaluate_accuracy(net, data_iter, device): # net：神经网络 data_iter：数据迭代器
    net.eval()  #将模型设置成评估模式（关闭Dropout和BatchNorm的训练特性）
    correct, total = 0, 0
    with torch.no_grad():  #评估阶段不需要反向传播
        for X, y in data_iter:
            X, y = X.to(device), y.to(device)  #将数据传入指定设备
            y_hat = net(X)  #调用模型预测，获得预测结果（类别得分）
            correct += (y_hat.argmax(dim=1) == y).sum().item()
            total += y.size(0)
    return correct / total

#定义训练
def train(net, train_iter, test_iter, loss_fn, num_epochs, trainer, device=None):
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    net.to(device)
    for epoch in range(num_epochs):
        net.train()
        train_loss, train_acc, total = 0.0, 0.0, 0
        for X, y in train_iter:
            X, y = X.to(device), y.to(device)
            trainer.zero_grad()
            y_hat = net(X)
            loss = loss_fn(y_hat, y)
            loss.backward()
            trainer.step() #w ← w - lr * grad
            train_loss += loss.item() * y.size(0)
            train_acc += (y_hat.argmax(dim=1) == y).sum().item()
            total += y.size(0)
        test_acc = evaluate_accuracy(net, test_iter, device)
        print(f'Epoch {epoch+1}, Loss: {train_loss/total:.3f}, '
              f'Train Acc: {train_acc/total:.3f}, Test Acc: {test_acc:.3f}')

# 开始训练
train(net, train_iter, test_iter, loss_fn, num_epochs, trainer)


Epoch 1, Loss: 0.978, Train Acc: 0.656, Test Acc: 0.741
Epoch 2, Loss: 0.566, Train Acc: 0.800, Test Acc: 0.802
Epoch 3, Loss: 0.495, Train Acc: 0.825, Test Acc: 0.820
Epoch 4, Loss: 0.457, Train Acc: 0.840, Test Acc: 0.817
Epoch 5, Loss: 0.428, Train Acc: 0.848, Test Acc: 0.837
Epoch 6, Loss: 0.410, Train Acc: 0.854, Test Acc: 0.835
Epoch 7, Loss: 0.396, Train Acc: 0.859, Test Acc: 0.836
Epoch 8, Loss: 0.382, Train Acc: 0.863, Test Acc: 0.841
Epoch 9, Loss: 0.370, Train Acc: 0.867, Test Acc: 0.803
Epoch 10, Loss: 0.359, Train Acc: 0.872, Test Acc: 0.857
Epoch 11, Loss: 0.354, Train Acc: 0.873, Test Acc: 0.848
Epoch 12, Loss: 0.344, Train Acc: 0.877, Test Acc: 0.842
Epoch 13, Loss: 0.337, Train Acc: 0.879, Test Acc: 0.863
Epoch 14, Loss: 0.330, Train Acc: 0.882, Test Acc: 0.853
Epoch 15, Loss: 0.325, Train Acc: 0.883, Test Acc: 0.864
Epoch 16, Loss: 0.319, Train Acc: 0.885, Test Acc: 0.871
Epoch 17, Loss: 0.314, Train Acc: 0.887, Test Acc: 0.863
Epoch 18, Loss: 0.308, Train Acc: 0.889,