In [1]:
import torch
import torchvision
import numpy as np
import sys
import data_dj

In [2]:
batch_size = 256
train_iter, test_iter = data_dj.load_data_fashion_mnist(batch_size)
num_inputs = 784
num_outputs = 10
W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float, requires_grad=True)
b = torch.zeros(num_outputs, dtype=torch.float, requires_grad=True)

In [3]:
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(dim=1, keepdim=True)
    return X_exp / partition # 这里里里应用用了了广广播机制

In [4]:
X = torch.rand((2, 5))
X_prob = softmax(X)
print(X_prob, X_prob.sum(dim=1))

tensor([[0.1508, 0.1686, 0.2177, 0.2004, 0.2625],
        [0.2867, 0.1630, 0.2112, 0.2205, 0.1185]]) tensor([1.0000, 1.0000])


In [5]:
def net(X):
    return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)

In [6]:
def cross_entropy(y_hat, y):
    return - torch.log(y_hat.gather(1, y.view(-1, 1)))

In [7]:
def accuracy(y_hat, y):
    return (y_hat.argmax(dim=1) == y).float().mean().item()

In [8]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

In [9]:
num_epochs, lr = 5, 0.1
# 本函数已保存在d2lzh包中方方便便以后使用用
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward()
            if optimizer is None:
                data_dj.sgd(params, lr, batch_size)
            else:
                optimizer.step() # “softmax回归的简洁实现”一一节将用用到
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' 
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

In [10]:
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)

epoch 1, loss 0.7896, train acc 0.746, test acc 0.804
epoch 2, loss 0.5745, train acc 0.811, test acc 0.822
epoch 3, loss 0.5292, train acc 0.824, test acc 0.830
epoch 4, loss 0.5053, train acc 0.829, test acc 0.834
epoch 5, loss 0.4892, train acc 0.834, test acc 0.833
