## Torch参数管理

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.randn((2,4))
net(X)

  from .autonotebook import tqdm as notebook_tqdm


tensor([[0.2496],
        [0.1374]], grad_fn=<AddmmBackward0>)

### 参数访问

In [8]:
for i in range(len(net)):
    print(net[i],end="*****")
    print(net[i].state_dict())

Linear(in_features=4, out_features=8, bias=True)*****OrderedDict([('weight', tensor([[-0.2865,  0.1825,  0.4246,  0.0404],
        [-0.1624,  0.2773,  0.3223,  0.4330],
        [ 0.2916,  0.4496, -0.1923, -0.1654],
        [ 0.1439,  0.4049,  0.1987,  0.4646],
        [ 0.1551,  0.0485, -0.0547,  0.3895],
        [-0.0592, -0.2092,  0.3625,  0.4155],
        [ 0.4247, -0.4286, -0.0481,  0.0708],
        [ 0.4779, -0.1641, -0.4195,  0.2046]])), ('bias', tensor([-0.2723, -0.0415, -0.0759, -0.3348,  0.3937, -0.4272,  0.2256, -0.2450]))])
ReLU()*****OrderedDict()
Linear(in_features=8, out_features=1, bias=True)*****OrderedDict([('weight', tensor([[ 0.0411, -0.2077,  0.3242, -0.1308, -0.0301, -0.1770,  0.2029,  0.0360]])), ('bias', tensor([0.1242]))])


#### 获取单个参数

In [10]:
print(net[2].bias)
print(net[2].bias.data)


Parameter containing:
tensor([0.1242], requires_grad=True)
tensor([0.1242])


#### 遍历所有的参数

In [14]:
print(*[(name,param.shape) for name,param in net[0].named_parameters()])
print(*[(name,param.shape) for name,param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


### 从嵌套块收集参数

In [25]:
from icecream import ic


def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                         nn.Linear(8, 4), nn.ReLU())


def block2(n):
    net = nn.Sequential()
    for i in range(n):
        # 嵌套在这里
        net.add_module(name=f"block{i}", module=block1())
    return net


rgnet = nn.Sequential(block2(4), nn.Linear(4, 1))
ic(rgnet)
ic(rgnet[0])
ic(rgnet[0][0])
ic(rgnet[0][0][0])
ic(rgnet[0][0][0].bias.data)

ic| rgnet: Sequential(
             (0): Sequential(
               (block0): Sequential(
                 (0): Linear(in_features=4, out_features=8, bias=True)
                 (1): ReLU()
                 (2): Linear(in_features=8, out_features=4, bias=True)
                 (3): ReLU()
               )
               (block1): Sequential(
                 (0): Linear(in_features=4, out_features=8, bias=True)
                 (1): ReLU()
                 (2): Linear(in_features=8, out_features=4, bias=True)
                 (3): ReLU()
               )
               (block2): Sequential(
                 (0): Linear(in_features=4, out_features=8, bias=True)
                 (1): ReLU()
                 (2): Linear(in_features=8, out_features=4, bias=True)
                 (3): ReLU()
               )
               (block3): Sequential(
                 (0): Linear(in_features=4, out_features=8, bias=True)
                 (1): ReLU()
                 (2): Linear(in_features=8, out_

tensor([ 0.3426,  0.0028,  0.2502,  0.0514, -0.2991,  0.1288,  0.0073, -0.1657])

### 参数初始化
Torch内置的参数初始化为随机初始化，但也允许我们自定义初始化方法 nn.init模块中提供了多种初始化方法

#### 内置初始化

In [27]:
def init_normal(module):
    """
    权重初始化为高斯分布
    偏置置0
    :param module:
    :return:
    """
    if type(module)==nn.Module:
        nn.init.normal_(tensor=module.weight,mean=0,std=.01)
        nn.init.zeros_(tensor=module.bias)

def init_const(module):
    if type(module)==nn.Module:
        nn.init.constant(module.weight,1)
        nn.init.zeros_(tensor=module.bias)

rgnet.apply(init_normal)
# print(rgnet.weight.data)

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)