# 1多层感知机的实现

# 1.1 读取数据

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

batch_size = 256
num_workers=0

mnist_train = torchvision.datasets.FashionMNIST(root='~/Desktop/Datasets/FashionMNIST', train=True, download=False, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Desktop/Datasets/FashionMNIST', train=False, download=False, transform=transforms.ToTensor())

train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

## 1.2定义模型和参数

In [2]:
import numpy as np
num_inputs, num_outputs, num_hiddens = 784, 10, 256

W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float)
b1 = torch.zeros(num_hiddens, dtype=torch.float)
W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)


params = [W1, b1, W2, b2]
for param in params:
    param.requires_grad_(requires_grad=True)

## 1.3 定义激活函数

In [3]:
def relu(X):
    return torch.max(input=X, other=torch.tensor(0.0))

## 1.4 定义模型

In [9]:
def net(X):
    X = X.view((-1,num_inputs))
    H = relu(torch.mm(X,W1) + b1)
    return torch.mm(H,W2) + b2

## 1.5定义损失函数

In [5]:
loss = torch.nn.CrossEntropyLoss()

## 1.6 小批量梯度下降优化函数

In [6]:
def sgd(params, lr, batch_size):
    # 为了和原书保持一致，这里除以了batch_size，但是应该是不用除的，因为一般用PyTorch计算loss时就默认已经
    # 沿batch维求了平均了。
    for param in params:
        param.data -= lr * param.grad / batch_size # 注意这里更改param时用的param.data

## 1.7 训练模型

In [11]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

In [7]:
num_epochs, lr =2, 100.0

def train_mlp(net, train_iter, test_iter, loss, num_epochs, batch_size,
                  params=None, lr=None, optimizer=None):
    
    for epoch in range(num_epochs):
        
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        
        for X,y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            
            #梯度清0
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
                
            l.backward()
            
            if optimizer is not None:
                optimizer.step()
            else:
                sgd(params, lr, batch_size)
            
            train_l_sum += l.item()
            
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            
            n += y.shape[0]
            
            test_acc = evaluate_accuracy(test_iter, net)
            
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
            % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

In [12]:
train_mlp(net,train_iter, test_iter, loss, num_epochs, batch_size,
         params=params, lr=lr)

epoch 1, loss 0.0030, train acc 0.713, test acc 0.800
epoch 2, loss 0.0019, train acc 0.822, test acc 0.753


# 2 简洁实现

## 2.1定义模型&初始化参数

In [26]:
import torch.nn as nn
from torch.nn import init

num_inputs, num_outputs, num_hiddens= 784, 10, 256

#展开向量
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): # x shape: (batch, *, *, ...)
        return x.view(x.shape[0], -1) 
    
net2 = nn.Sequential()
net2.add_module('FlattenLayer',FlattenLayer())
net2.add_module('linear1',nn.Linear(num_inputs,num_hiddens))
net2.add_module('Relu',nn.ReLU())
net2.add_module('linear2',nn.Linear(num_hiddens,num_outputs))

for params in net2.parameters():
    init.normal_(params, mean=0, std=0.01)

print(net2)

Sequential(
  (FlattenLayer): FlattenLayer()
  (linear1): Linear(in_features=784, out_features=256, bias=True)
  (Relu): ReLU()
  (linear2): Linear(in_features=256, out_features=10, bias=True)
)


## 2.2 读取数据

In [27]:
batch_size = 256
num_workers=0

mnist_train = torchvision.datasets.FashionMNIST(root='~/Desktop/Datasets/FashionMNIST', train=True, download=False, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Desktop/Datasets/FashionMNIST', train=False, download=False, transform=transforms.ToTensor())

train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

## 2.3 损失函数&优化函数

In [28]:
loss = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

## 2.4 训练模型

In [30]:
num_epochs = 2

train_mlp(net2,train_iter, test_iter, loss, num_epochs, batch_size,
         optimizer = optimizer)

epoch 1, loss 0.0090, train acc 0.082, test acc 0.080
epoch 2, loss 0.0090, train acc 0.082, test acc 0.080
