### 多层感知机

#### 使用API

In [3]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
X = torch.rand((2,20))

net(X)

tensor([[-0.3411, -0.2115, -0.1651, -0.1120, -0.1301, -0.3345,  0.0500,  0.2462,
          0.1398,  0.1693],
        [-0.1254, -0.2322, -0.0678, -0.2044, -0.1391, -0.1996,  0.0428,  0.2386,
          0.0408,  0.1533]], grad_fn=<AddmmBackward0>)

#### 自动以模块

In [10]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)
        
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

In [11]:
net2 = MLP()
net2(X)

tensor([[-0.1799, -0.2857, -0.0478, -0.0135, -0.1515, -0.1657, -0.0091,  0.0355,
          0.1717, -0.1080],
        [-0.2313, -0.2420, -0.1103, -0.0679, -0.0295, -0.2445,  0.1250,  0.1628,
          0.1478, -0.1971]], grad_fn=<AddmmBackward0>)

### nn.Sequential实现

In [13]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block
            
    def forward(self, X):
        for block in self._modules.values():
            X=block(X)
        return X

net3 = MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net3(X)

tensor([[ 0.2651, -0.3284, -0.0281, -0.1864,  0.0945, -0.1045,  0.1616,  0.0243,
         -0.2327, -0.0394],
        [ 0.1316, -0.3607, -0.1718, -0.2215, -0.0665, -0.1224,  0.1562,  0.2074,
         -0.0109,  0.0133]], grad_fn=<AddmmBackward0>)

结论：可以通过`nn.Module`来更灵活的实现nn.Sequential

### 参数管理

In [14]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X = torch.rand((2,4))

net(X)

tensor([[ 0.0098],
        [-0.0797]], grad_fn=<AddmmBackward0>)

#### 访问网络中的weights

In [15]:
net[2]

Linear(in_features=8, out_features=1, bias=True)

In [17]:
net[2].state_dict() # weight & bias

OrderedDict([('weight',
              tensor([[-0.2357, -0.1297, -0.1293,  0.1118,  0.0302, -0.1980, -0.0048,  0.0384]])),
             ('bias', tensor([0.0092]))])

In [21]:
net[2].weight.data

tensor([[-0.2357, -0.1297, -0.1293,  0.1118,  0.0302, -0.1980, -0.0048,  0.0384]])

In [24]:
net[2].weight.grad == None #还没做back propogation

True

In [25]:
# 一次性访问所有参数
print(*[(name, param.shape)for name, param in net[0].named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))


#### 修改内置初始化

In [29]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01) #将这一层的网络参数权重初始化为均值为0，标准差为0.01
        nn.init.zeros_(m.bias)

net.apply(init_normal) #所有net中的layer依次应用


Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

In [30]:
net[0].weight.data

tensor([[-0.0025, -0.0140, -0.0081, -0.0206],
        [-0.0008,  0.0187, -0.0114,  0.0073],
        [-0.0019, -0.0152,  0.0006, -0.0074],
        [-0.0010, -0.0044,  0.0143, -0.0015],
        [-0.0030,  0.0088,  0.0029, -0.0050],
        [ 0.0047, -0.0048, -0.0128,  0.0058],
        [-0.0192,  0.0038, -0.0171, -0.0080],
        [-0.0069, -0.0043, -0.0043, -0.0042]])

共享权重<br>
创建一个Linear层传入到Sequence中，在不同的位置加入这个层，这两个层的参数是共享的。

### 自定义层

#### 没有参数的层

In [37]:
# 构造一个没有任何参数的自定义层
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self,X):
        return X-X.mean()

net = nn.Sequential(nn.Linear(8,8),CenteredLayer(),nn.ReLU(),nn.Linear(8,2))
X = torch.rand((4,8))
net(X)

tensor([[-0.0068,  0.2351],
        [-0.0541,  0.2204],
        [ 0.0151,  0.1978],
        [-0.0299,  0.1647]], grad_fn=<AddmmBackward0>)

#### 带参数的层

In [57]:
# 构造一个没有任何参数的自定义层
class CenteredLayer(nn.Module):
    def __init__(self, in_units, out_units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, out_units))
        self.bias = nn.Parameter(torch.randn(out_units,))
    
    def forward(self,X):
        linear = torch.matmul(X,self.weight.data) + self.bias.data
        return F.relu(linear)

dense = CenteredLayer(5,3)
dense.weight.data

tensor([[-0.6890,  0.0131, -1.5291],
        [ 1.0957, -3.3585,  1.6728],
        [ 0.2971, -1.2093,  1.3378],
        [ 1.2908, -0.5028,  1.5309],
        [ 0.3560, -1.4394, -0.3915]])

In [58]:
X = torch.rand(2,5)
dense(X)

tensor([[0.7042, 0.0000, 4.5077],
        [0.0000, 0.0000, 3.2025]])

### 读写文件

In [59]:
x = torch.arange(3)
torch.save(x, 'x-file')

x2 = torch.load('x-file')
x2

tensor([0, 1, 2])

### 加载和保存模型参数

In [61]:
import torch
from torch import nn
from torch.nn import functional as F

class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)
        
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

X = torch.randn(10,20)
net2 = MLP()
net2(X)

tensor([[ 0.3305, -0.1368, -0.2257, -0.2807,  0.0983,  0.1164, -0.3476,  0.1627,
          0.1175,  0.0816],
        [ 0.2137,  0.0569,  0.0278, -0.2554, -0.3911, -0.0747, -0.1527, -0.1115,
         -0.2113, -0.1539],
        [ 0.1725, -0.3105, -0.2965, -0.0638, -0.2403, -0.5052, -0.0474,  0.1496,
         -0.4131, -0.0839],
        [ 0.3223,  0.1299, -0.4592, -0.0494, -0.0606, -0.4693, -0.0526, -0.0589,
         -0.3011, -0.0018],
        [-0.0859, -0.1273, -0.0971, -0.2160,  0.2010, -0.1927, -0.2981,  0.0493,
          0.0260,  0.2309],
        [-0.0443, -0.1052, -0.3274, -0.1071, -0.0627, -0.0579,  0.0259, -0.0760,
         -0.3873, -0.1852],
        [ 0.0026, -0.0747, -0.1974,  0.1279,  0.1031, -0.1720, -0.0712,  0.1283,
         -0.0313, -0.2518],
        [-0.0978, -0.1201, -0.1585,  0.0291,  0.4118, -0.3020, -0.0346, -0.3169,
         -0.3723, -0.2459],
        [-0.1847, -0.3626, -0.4611,  0.0885,  0.0988, -0.2466,  0.1038,  0.2115,
         -0.2446,  0.3477],
        [ 0.1613, -

In [65]:
# 将模型参数存储到文件中
torch.save(net2.state_dict(),'mlp_params')

In [66]:
# 将模型加载进来
clone = MLP()
clone.load_state_dict(torch.load('mlp_params'))
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)