多层感知机和softmax回归唯一的不同在于，我们多加了一个全连接层作为隐藏层。
它的隐藏单元个数为256，并使用ReLU函数作为激活函数。

In [1]:
num_inputs, num_hiddens, num_outputs = 784, 256, 10

# 定义模型
import mtorch
import torch

from mtorch import nn

class LinearNet(torch.nn.Module):
    def __init__(self) -> None:
        super(LinearNet, self).__init__()
        self.flatten = mtorch.nn.Flatten()
        self.hidden = torch.nn.Linear(num_inputs, num_hiddens)
        self.relu = torch.nn.ReLU()
        self.output = torch.nn.Linear(num_hiddens, num_outputs)
    def forward(self,x):
        x = self.flatten(x)
        x = self.hidden(x)
        x = self.relu(x)
        x = self.output(x)
        return x
net = LinearNet()

# 初始化参数
for param in net.parameters():
    torch.nn.init.normal_(param, mean = 0, std=0.01)

In [2]:
# 训练模型
import torchvision
# 下载数据集
mnist_train = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train=True, download=True,
    transform=torchvision.transforms.ToTensor()     # 自动转为torch张量
)
mnist_test = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train=False, download=True,
    transform=torchvision.transforms.ToTensor()     # 自动转为torch张量
)
len(mnist_train), len(mnist_test)

(60000, 10000)

In [3]:
# 训练
from mtorch import nn
processor = mtorch.nn.Processor(net)

processor.compile(
    optimizer=torch.optim.SGD(net.parameters(), lr = 0.5),
    loss=torch.nn.CrossEntropyLoss()
)

processor.fit(
    mnist_train, batch_size=256, epochs=5,
    validation_data=mnist_test
)

epoch 1, loss 0.0032, train acc 0.700, test acc 0.675
epoch 2, loss 0.0019, train acc 0.817, test acc 0.816
epoch 3, loss 0.0017, train acc 0.841, test acc 0.830
epoch 4, loss 0.0015, train acc 0.856, test acc 0.762
epoch 5, loss 0.0015, train acc 0.863, test acc 0.856
