In [1]:
import torchvision
# 下载数据集
mnist_train = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train=True, download=True,
    transform=torchvision.transforms.ToTensor()     # 自动转为torch张量
)
mnist_test = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train=False, download=True,
    transform=torchvision.transforms.ToTensor()     # 自动转为torch张量
)
len(mnist_train), len(mnist_test)

import torch
import torch.utils.data

batch_size = 256

train_iter = torch.utils.data.DataLoader(
    mnist_train, batch_size=batch_size,
    shuffle=True,
    num_workers=0       # 开启num_workers个线程
)
test_iter = torch.utils.data.DataLoader(
    mnist_test, batch_size=batch_size,
    shuffle=True,
    num_workers=0       # 开启num_workers个线程
)



In [2]:
# 定义模型
import torch

class LinearNet(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs) -> None:
        super(LinearNet, self).__init__()
        self.linear = torch.nn.Linear(num_inputs, num_outputs)
    def forward(self, x):
        return self.linear(x.view(x.shape[0], -1))

num_inputs, num_outputs = 784, 10
net = LinearNet(num_inputs, num_outputs)

In [3]:
# 随机化参数
torch.nn.init.normal_(net.linear.weight, mean = 0, std = 0.01)
torch.nn.init.constant_(net.linear.bias, val = 0)

loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

In [4]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for x, y in data_iter:
        acc_sum += (net(x).argmax(dim = 1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n
# 训练模型
epochs = 5
def train(net, train_iter, test_iter, loss, epochs):
    for epoch in range(epochs):
        train_loss_sum, train_acc_sum, n = 0.0, 0.0, 0
        for x, y in train_iter:
            y_hat = net(x)
            l = loss(y_hat, y).sum()
            optimizer.zero_grad()
            l.backward()
            optimizer.step()

            train_loss_sum += l.sum().item()
            train_acc_sum += (y_hat.argmax(dim = 1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' %
                (epoch+1, train_loss_sum/n, train_acc_sum/n, test_acc))

train(net, train_iter, test_iter, loss, epochs)


epoch 1, loss 0.0031, train acc 0.750, test acc 0.774
epoch 2, loss 0.0022, train acc 0.813, test acc 0.811
epoch 3, loss 0.0021, train acc 0.827, test acc 0.821
epoch 4, loss 0.0020, train acc 0.833, test acc 0.791
epoch 5, loss 0.0019, train acc 0.837, test acc 0.819
