# 模型参数的访问和初始化

In [1]:
import torch 
import torch.nn as nn

In [2]:
net = nn.Sequential(
    nn.Linear(5, 4),
    nn.ReLU(),
    nn.Linear(4, 1)
)

print(net)

x = torch.rand(5,5)
y = net(x).sum()

Sequential(
  (0): Linear(in_features=5, out_features=4, bias=True)
  (1): ReLU()
  (2): Linear(in_features=4, out_features=1, bias=True)
)


## 访问模型参数

In [3]:
# method 1 .parameters
print(net.parameters())
for param in net.parameters():
    print(param.size(), type(param))

<generator object Module.parameters at 0x7f2451558570>
torch.Size([4, 5]) <class 'torch.nn.parameter.Parameter'>
torch.Size([4]) <class 'torch.nn.parameter.Parameter'>
torch.Size([1, 4]) <class 'torch.nn.parameter.Parameter'>
torch.Size([1]) <class 'torch.nn.parameter.Parameter'>


In [4]:
# method 2 .named_parameters
print(net.named_parameters())
for name, param in net.named_parameters():
    print(name, param.size(), type(param))

<generator object Module.named_parameters at 0x7f23e6e91e60>
0.weight torch.Size([4, 5]) <class 'torch.nn.parameter.Parameter'>
0.bias torch.Size([4]) <class 'torch.nn.parameter.Parameter'>
2.weight torch.Size([1, 4]) <class 'torch.nn.parameter.Parameter'>
2.bias torch.Size([1]) <class 'torch.nn.parameter.Parameter'>


In [5]:
# 访问单层模型参数
for name, param in net[0].named_parameters():
    print(name, param.size(), type(param))

weight torch.Size([4, 5]) <class 'torch.nn.parameter.Parameter'>
bias torch.Size([4]) <class 'torch.nn.parameter.Parameter'>


**Parameter是Tensor的子类， 如果一个Tensor 是 Parameter, 那么tensor会自动被添加到模型列表里**

In [8]:
class DemoModel(nn.Module):
    def __init__(self, **kwargs):
        super(DemoModel, self).__init__(**kwargs)
        self.weight1 = nn.Parameter(torch.rand(4, 3))
        self.weight2 = torch.rand(4, 3)
    def forward(self, x):
        pass

In [9]:
demo_net = DemoModel()
for name, param in demo_net.named_parameters():
    print(name, param.size(), type(param))

weight1 torch.Size([4, 3]) <class 'torch.nn.parameter.Parameter'>


weight1 在参数列表， 而 weight2 不在参数列表

## 初始化模型参数

In [13]:
# initial weight as normal distribution
for name, param in net.named_parameters():
    if 'weight' in name:
        nn.init.normal_(param, mean=0, std=0.01)
        print(name, param.data)    

0.weight tensor([[-0.0107,  0.0033, -0.0034, -0.0149,  0.0026],
        [ 0.0149, -0.0019,  0.0021, -0.0076, -0.0184],
        [ 0.0094,  0.0097, -0.0005,  0.0002,  0.0045],
        [ 0.0084,  0.0242,  0.0030, -0.0017,  0.0015]])
0.weight tensor([[-0.0043, -0.0139,  0.0144,  0.0030,  0.0138],
        [-0.0049,  0.0087,  0.0054,  0.0007, -0.0068],
        [-0.0096,  0.0035,  0.0090,  0.0014, -0.0086],
        [-0.0034, -0.0021,  0.0100, -0.0106,  0.0163]])
2.weight tensor([[ 0.0050,  0.0016, -0.0031, -0.0079]])
2.weight tensor([[-0.0042,  0.0025,  0.0025,  0.0026]])


In [16]:
# initial bias as constant 0
for name, param in net.named_parameters():
    if 'bias' in name:
        nn.init.constant_(param, val=0.)
        print(name, param.data) 

0.bias tensor([0., 0., 0., 0.])
0.bias tensor([0., 0., 0., 0.])
2.bias tensor([0.])
2.bias tensor([0.])


## 自定义参数初始化方法

In [17]:
# method 1 with no_grad
def init_weight_(tensor):
    with torch.no_grad():
        tensor.normal_(mean=1, std=0.01)

In [18]:
for name, param in net.named_parameters():
    if 'weight' in name:
        init_weight_(param)
        print(name, param.data)   

0.weight tensor([[0.9975, 0.9960, 0.9972, 1.0055, 0.9755],
        [0.9944, 1.0065, 1.0047, 1.0051, 0.9996],
        [0.9964, 1.0207, 0.9852, 1.0082, 1.0099],
        [0.9987, 0.9790, 0.9945, 0.9971, 1.0007]])
2.weight tensor([[0.9838, 1.0018, 0.9921, 1.0024]])


In [27]:
# method 2 with tensor.data
def init_bias__(tensor):
    tensor.data += 1.

In [28]:
for name, param in net.named_parameters():
    if 'bias' in name:
        init_bias__(param)
        print(name, param.data) 

0.bias tensor([1., 1., 1., 1.])
2.bias tensor([1.])


## 共享模型参数