In [13]:
import torch
from torch import nn
from torch.nn import init
import numpy as np 

net = nn.Sequential(nn.Linear(4,3),nn.ReLU(),nn.Linear(3,1))

print(net)
x=torch.rand(2,4)
y=net(x).sum()

Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)


# 访问模型参数

* 通过Module类的parameters()
* 或者named_parameters()  
来访问所有参数（上述均以迭代器的形式返回）

In [14]:
print(type(net.named_parameters()))
print(type(net.parameters()))
for name,param in net.named_parameters():
    print(name,param)
print('**************************************')
print('**************************************')
print('**************************************')
print('**************************************')
for param in net.parameters():
    print(param)

<class 'generator'>
<class 'generator'>
0.weight Parameter containing:
tensor([[ 0.1741, -0.1956, -0.2257,  0.0487],
        [-0.2008,  0.3406, -0.3661,  0.1318],
        [ 0.0457,  0.2257,  0.3323, -0.2242]], requires_grad=True)
0.bias Parameter containing:
tensor([-0.1013, -0.4943,  0.3208], requires_grad=True)
2.weight Parameter containing:
tensor([[-0.1511,  0.2940, -0.1841]], requires_grad=True)
2.bias Parameter containing:
tensor([0.2517], requires_grad=True)
**************************************
**************************************
**************************************
**************************************
Parameter containing:
tensor([[ 0.1741, -0.1956, -0.2257,  0.0487],
        [-0.2008,  0.3406, -0.3661,  0.1318],
        [ 0.0457,  0.2257,  0.3323, -0.2242]], requires_grad=True)
Parameter containing:
tensor([-0.1013, -0.4943,  0.3208], requires_grad=True)
Parameter containing:
tensor([[-0.1511,  0.2940, -0.1841]], requires_grad=True)
Parameter containing:
tensor([0.251

可以看出返回的名字自动加上了层数的索引作为前缀。我们再来访问net中单层的参数。

In [15]:
for name,param in net[0].named_parameters():
    print(name,param.shape,type(param))

weight torch.Size([3, 4]) <class 'torch.nn.parameter.Parameter'>
bias torch.Size([3]) <class 'torch.nn.parameter.Parameter'>


返回的param类型为torch.nn.parameter.Parameter，其实是torch.tensor的子类，和tensor__不同的是如果一个tensor是parameter，那么他会自动添加到模型的参数列表中。__

In [16]:
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.weight1 = nn.Parameter(torch.rand(20,20))
        self.weight2 = torch.tensor(np.random.rand(20,20))
    
    def forward(self):
        pass


In [17]:
n = MyModel()
for name, param in n.named_parameters():
    print(name)

weight1


上述代码中weight1在参数列表中，但是weight2不在参数列表中

parameter是tensor，即tensor拥有的属性它都有，比如可以根据data访问参数值，用grad访问参数梯度

In [18]:
weight0=list(net[0].parameters())[0]
print(weight0)
#反向传播前梯度
print(weight0.grad)
y.backward()
print(weight0.grad)

Parameter containing:
tensor([[ 0.1741, -0.1956, -0.2257,  0.0487],
        [-0.2008,  0.3406, -0.3661,  0.1318],
        [ 0.0457,  0.2257,  0.3323, -0.2242]], requires_grad=True)
None
tensor([[ 0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000],
        [-0.2413, -0.1270, -0.1922, -0.1798]])


# 初始化模型参数

pytorch中nn.Module模块参数采用了较为合理的初始化策略

但是我们经常需要使用其他方法来初始化权重。pytorch的__init模块__中提供了多种预设的初始化方法。

In [19]:
for name,param in net.named_parameters():
    if 'weight' in name:
        init.normal_(param,0,0.01)
        print(name,param)

0.weight Parameter containing:
tensor([[ 0.0070,  0.0085,  0.0091, -0.0145],
        [ 0.0023, -0.0060, -0.0078, -0.0120],
        [ 0.0153,  0.0084,  0.0156, -0.0154]], requires_grad=True)
2.weight Parameter containing:
tensor([[-0.0080, -0.0148,  0.0040]], requires_grad=True)


下面使用常数来初始化权重参数

In [20]:
for name,param in net.named_parameters():
    if 'bias' in name:
        init.constant_(param,val=0)
        print(name,param)

0.bias Parameter containing:
tensor([0., 0., 0.], requires_grad=True)
2.bias Parameter containing:
tensor([0.], requires_grad=True)


# 自定义初始化方法

inplace改变Tensor值+不记录梯度

In [21]:
def initWeight(tensor):
    with torch.no_grad():
        tensor.uniform_(-10,10)
        tensor *= (tensor.abs() >= 5).float()
for name,param in net.named_parameters():
    if 'weight' in name:
        initWeight(param)
        print(name,param.data)

0.weight tensor([[-9.5132, -9.1935,  0.0000,  0.0000],
        [ 6.2710,  9.5009, -6.4043, -9.1866],
        [ 7.6908, -9.0243, -9.0246,  5.0344]])
2.weight tensor([[-9.6534, -8.5862, -0.0000]])


还可以改变这些参数的data来改写模型参数值同时不影响梯度

# 共享模型参数

有些情况下，我们希望在各个层之间共享参数

In [22]:
linear=nn.Linear(1,1,bias=False)
net=nn.Sequential(linear,linear)
print(net)
for name,param in net.named_parameters():
    init.constant_(param,val=3)
    print(name,param)

Sequential(
  (0): Linear(in_features=1, out_features=1, bias=False)
  (1): Linear(in_features=1, out_features=1, bias=False)
)
0.weight Parameter containing:
tensor([[3.]], requires_grad=True)


在内存中，这两个线性层其实是一个对象

In [23]:
print(id(net[0])==id(net[1]))
print(id(net[0].weight)==id(net[1].weight))

True
True


因为模型参数里包含了梯度，再反向传播时,这些参数的梯度是累加的

In [24]:
x=torch.ones(1,1)
y=net(x)
y.backward()
print(net[0].weight.grad)
print(net[1].weight.grad)

tensor([[6.]])
tensor([[6.]])
