# 多层感知机从零开始实现

In [None]:
%matplotlib inline

In [None]:
BATCH_SIZE = 256
NUM_INPUTS, NUM_OUTPUTS, NUM_HIDDENS = 784, 10, 256
NUM_EPOCHS, LR = 10, 0.1

## Step1 数据集

In [None]:
from datasets import load_data_fashion_mnist

train_iter, test_iter = load_data_fashion_mnist(BATCH_SIZE)

## Step2 定义模型

In [None]:
import torch
from torch import nn

W1 = nn.Parameter(torch.randn(NUM_INPUTS, NUM_HIDDENS, requires_grad=True) * 0.01)
b1 = nn.Parameter(torch.zeros(NUM_HIDDENS, requires_grad=True))

W2 = nn.Parameter(torch.randn(NUM_HIDDENS, NUM_OUTPUTS, requires_grad=True) * 0.01)
b2 = nn.Parameter(torch.zeros(NUM_OUTPUTS, requires_grad=True))

params = [W1, b1, W2, b2]

In [None]:
def relu(X):
    a = torch.zeros_like(X)
    return torch.max(X, a)

In [None]:
def net(X):
    X = X.reshape((-1, NUM_INPUTS))
    H = relu(X @ W1 + b1)
    return (H @ W2 + b2)

## Step3 损失函数

In [None]:
loss = nn.CrossEntropyLoss(reduction='none')

## Step4 训练

In [None]:
from visualize import Animator
from utils import Accumulator
from trainer import train_v1

In [None]:
updater = torch.optim.SGD(params, lr=LR)

train_v1(net, train_iter, test_iter, loss, NUM_EPOCHS, updater)

# 多层感知机 PyTorch 实现

In [None]:
net = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Linear(256, 10)
)

def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)

net.apply(init_weights)

In [None]:
train_iter, test_iter = load_data_fashion_mnist(BATCH_SIZE)
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(), lr=LR)

train_v1(net, train_iter, test_iter, loss, NUM_EPOCHS, trainer)