# 参数管理

## example 1 hidden layer MLP(但隐藏层多层感知机)

In [2]:
import torch as tor
from torch import nn as tor_nn


In [4]:
net = tor_nn.Sequential(
    tor_nn.Linear(4, 8),tor_nn.ReLU(),
    tor_nn.Linear(8, 1)
)
X = tor.rand(size=(2, 4))
net(X)

tensor([[0.0060],
        [0.0993]], grad_fn=<AddmmBackward0>)

sequential 可以简单的理解为python中的表单(list)

因此我们可以通过一些办法去访问这个表单，就像我们使用索引去访问数组一样

In [8]:
print(net[0].state_dict())
print(net[1].state_dict())
print(net[2].state_dict())

OrderedDict([('weight', tensor([[-0.1457,  0.4638, -0.1093,  0.1620],
        [ 0.3534, -0.0251, -0.2591,  0.4670],
        [-0.4415, -0.0902,  0.0551, -0.3649],
        [ 0.2536, -0.3261,  0.4174, -0.2729],
        [-0.2054, -0.3219, -0.1962, -0.3010],
        [-0.2352, -0.0879,  0.3241,  0.1783],
        [ 0.2218, -0.3994, -0.3758,  0.3522],
        [ 0.0490, -0.0114, -0.0968,  0.3093]])), ('bias', tensor([-0.4103, -0.0907, -0.2448, -0.4558, -0.0382, -0.0858, -0.3601,  0.2401]))])
OrderedDict()
OrderedDict([('weight', tensor([[ 0.0421, -0.1938,  0.3524,  0.2961, -0.1071,  0.1066,  0.1025, -0.2362]])), ('bias', tensor([0.2057]))])


我们可以看到我们的计数是从0开始的，然后到2，其中我们有:
- 第一层Linear(4, 8)
- 第二层ReLU(这玩意没什么参数)
- 第三层Linear(8, 1)

In [10]:
print(type(net[2].bias)) # 类型
print(net[2].bias)       # 详细参数
print(net[2].bias.data)  # 数据

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.2057], requires_grad=True)
tensor([0.2057])


感觉就是一个C的链表

In [13]:
print(net[2].weight)
print(net[2].weight.grad) # 这里我们看到是None应为我们话没有进行反向传播所以是None

Parameter containing:
tensor([[ 0.0421, -0.1938,  0.3524,  0.2961, -0.1071,  0.1066,  0.1025, -0.2362]],
       requires_grad=True)
None


### 一次性访问所有参数

In [14]:
print("单个第0层参数")
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print("多层所有参数")
print(*[(name, param.shape) for name, param in net.named_parameters()])


单个第0层参数
('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
多层所有参数
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


名字作为索引访问(感觉像Lua)

In [16]:
net.state_dict()['2.weight'].data

tensor([[ 0.0421, -0.1938,  0.3524,  0.2961, -0.1071,  0.1066,  0.1025, -0.2362]])

### 嵌套块收集参数

In [17]:
def block1():
    return tor_nn.Sequential(
        tor_nn.Linear(4, 8), tor_nn.ReLU(),
        tor_nn.Linear(8, 4), tor_nn.ReLU()
    )

def block2():
    net = tor_nn.Sequential() # 表明类型创建一个空表，你可以看成一个类型指代
    for i in range(4):
        net.add_module(f'block{i}', block1()) # add_module的作用是我们可以去编排net中层的名字，就不用是0~n了
    return net

In [18]:
rgnet = tor_nn.Sequential(block2(), tor_nn.Linear(4, 1))
rgnet(X)

tensor([[-0.0642],
        [-0.0642]], grad_fn=<AddmmBackward0>)

展示所有这个网络的构成

In [19]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)
