In [1]:
import torch
from torch import nn
from d2l import torch as d2l

batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

In [3]:
# 实现一个具有单隐藏层的多层感知机

# 28*28,10类
num_inputs, num_outputs, num_hiddens = 784,10,256
w1 = nn.Parameter(
    torch.randn(num_inputs,num_hiddens,requires_grad=True)
)
b1 = nn.Parameter(torch.zeros(num_hiddens,requires_grad=True))
w2 = nn.Parameter(
    torch.randn(num_hiddens,num_outputs,requires_grad=True)
)
b2 = nn.Parameter(torch.zeros(num_outputs,requires_grad=True))

In [4]:
def relu(x):
    a = torch.zeros_like(x)
    return torch.max(x,a)

In [6]:
def net(x):
    x = x.reshape((-1,num_inputs))
    h = relu(x @ w1 + b1)
    return (h @ w2 + b2)
loss = nn.CrossEntropyLoss()

In [None]:
import torch
from torch import nn
from torch.utils import data
from d2l import torch as d2l

# 加载数据
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

# 模型参数
num_inputs, num_outputs, num_hiddens = 784, 10, 256
w1 = nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True))
b1 = nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
w2 = nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True))
b2 = nn.Parameter(torch.zeros(num_outputs, requires_grad=True))

# 激活函数
def relu(x):
    a = torch.zeros_like(x)
    return torch.max(x, a)

# 网络定义
def net(x):
    x = x.reshape((-1, num_inputs))
    h = relu(x @ w1 + b1)
    return (h @ w2 + b2)

# 损失函数和优化器
loss = nn.CrossEntropyLoss()
params = [w1, b1, w2, b2]
lr = 0.1
optimizer = torch.optim.SGD(params, lr=lr)

# 训练函数
def train_epoch(net, train_iter, loss, optimizer):
    if isinstance(net, nn.Module):
        net.train()
    
    metric = d2l.Accumulator(3)
    for X, y in train_iter:
        optimizer.zero_grad()
        y_hat = net(X)
        l = loss(y_hat, y)
        l.backward()
        optimizer.step()
        
        metric.add(l.item() * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
    
    return metric[0] / metric[2], metric[1] / metric[2]

# 评估函数
def evaluate_accuracy(net, data_iter):
    if isinstance(net, nn.Module):
        net.eval()
    
    metric = d2l.Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            y_hat = net(X)
            metric.add(d2l.accuracy(y_hat, y) * y.numel(), y.numel())
    return metric[0] / metric[1]

# 训练过程
def train(net, train_iter, test_iter, loss, num_epochs, optimizer):
    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs],
                          legend=['train loss', 'train acc', 'test acc'])
    for epoch in range(num_epochs):
        train_loss, train_acc = train_epoch(net, train_iter, loss, optimizer)
        test_acc = evaluate_accuracy(net, test_iter)
        
        animator.add(epoch + 1, [train_loss, train_acc, test_acc])
        
        print(f'epoch {epoch+1}, train loss {train_loss:.3f}, '
              f'train acc {train_acc:.3f}, test acc {test_acc:.3f}')
    
    return train_loss, train_acc, test_acc

# 执行训练
num_epochs = 10
train_loss, train_acc, test_acc = train(net, train_iter, test_iter, loss, num_epochs, optimizer)

print(f'Final results - train loss: {train_loss:.3f}, train acc: {train_acc:.3f}, test acc: {test_acc:.3f}')

# 预测并可视化
def predict(net, test_iter, n=10):
    X, y = next(iter(test_iter))
    trues = d2l.get_fashion_mnist_labels(y)
    preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    titles = [f'true: {true}\npred: {pred}' for true, pred in zip(trues, preds)]
    d2l.show_images(X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])

predict(net, test_iter)