In [1]:
import torch
from torch import nn
from torch.nn import functional as F

In [2]:
class MySequential(nn.Module):
    def __init__(self,*layers):
        super(MySequential, self).__init__()

        for i,layer in enumerate(layers):
            self._modules[str(i)]=layer
    def forward(self, input):
        for block in self._modules.values():
            input=block(input)
        return input

在正向传播函数中执行代码
正向传播中随便改 反向传播自动保留计算图 直接backward就行

In [3]:

class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super(FixedHiddenMLP, self).__init__()
        self.rand_weight = torch.rand((20,20),requires_grad=True)
        self.linear = nn.Linear(20,20)
    def forward(self, x):
        x=self.linear(x)
        x=F.relu(torch.mm(x,self.rand_weight)+1)
        x=self.linear(x)
        while x.abs().sum()>0.00001:
            x/=2
        return x.sum()

In [4]:
f=FixedHiddenMLP()


可以混合sequential 和任何模型 达到自己想要的模型

In [5]:
class NestMlp(nn.Module):
    def __init__(self):
        super(NestMlp, self).__init__()
        self.net=nn.Sequential(nn.Linear(20,20),nn.ReLU(),nn.Linear(20,20),nn.ReLU(),nn.Linear(20,20))
        self.Linear = nn.Linear(20,20)
    def forward(self, x):
        return self.Linear(self.net(x))

In [6]:
chimear=nn.Sequential(NestMlp(),nn.Linear(20,20),nn.ReLU(),nn.Linear(20,20),nn.ReLU(),nn.Linear(20,1))

In [7]:
x=torch.randn(1,20)
print(chimear(x))

tensor([[-0.1008]], grad_fn=<AddmmBackward0>)


# 2、参数管理

In [8]:
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))

In [9]:
x=torch.randn(1,4)
net(x)

tensor([[-0.2945]], grad_fn=<AddmmBackward0>)

In [10]:
##参数访问
net[2]###拿到的是第三个层  就是nn.linear
##然后可以使用state_dict() 来访问参数 state从自动机的角度上来讲就是他的状态
net[2].state_dict()
####可以通过名称访问
net[2].state_dict()['weight']
net.state_dict()

OrderedDict([('0.weight',
              tensor([[ 0.3806,  0.4140,  0.2805, -0.0232],
                      [ 0.3593, -0.2246,  0.1993, -0.1817],
                      [ 0.4313,  0.4668,  0.4079,  0.1663],
                      [-0.4818,  0.0698,  0.1350,  0.1613],
                      [ 0.0334, -0.1587, -0.3957,  0.3452],
                      [-0.3400, -0.1573,  0.1614,  0.0756],
                      [-0.1307,  0.2563,  0.0468,  0.2621],
                      [ 0.2672, -0.4869, -0.4331,  0.3235]])),
             ('0.bias',
              tensor([ 0.0514,  0.4571,  0.4940,  0.0357, -0.3324, -0.0348,  0.1599,  0.2477])),
             ('2.weight',
              tensor([[ 0.1187, -0.0597,  0.0026, -0.3163,  0.3186, -0.3284,  0.3375, -0.1357]])),
             ('2.bias', tensor([-0.1913]))])

##可以看到权重weight是一个8*1的矩阵 偏置是一维

In [11]:
type(net[2].bias) 
print(net[2].bias)

Parameter containing:
tensor([-0.1913], requires_grad=True)


Parameter是一个可以优化的参数对象  
打印出来就是parameter里包含一个可以改变的张量  
可以通过.data来访问 因为还有一个梯度 .grad访问梯度

In [12]:
print(net[2].bias.data)
print(net[2].bias.grad) ##因为没有做反向传播

tensor([-0.1913])
None


# 一次性访问所有参数

named_parameters 返回Module.named_parameters对象 和parameters返回Module.parameters对象  一个是参数+名称 一个是参数

In [13]:
net.parameters()

<generator object Module.parameters at 0x0000022C0DF84660>

In [14]:
for n,i in net.named_parameters():
    print(n)
    print(i)

0.weight
Parameter containing:
tensor([[ 0.3806,  0.4140,  0.2805, -0.0232],
        [ 0.3593, -0.2246,  0.1993, -0.1817],
        [ 0.4313,  0.4668,  0.4079,  0.1663],
        [-0.4818,  0.0698,  0.1350,  0.1613],
        [ 0.0334, -0.1587, -0.3957,  0.3452],
        [-0.3400, -0.1573,  0.1614,  0.0756],
        [-0.1307,  0.2563,  0.0468,  0.2621],
        [ 0.2672, -0.4869, -0.4331,  0.3235]], requires_grad=True)
0.bias
Parameter containing:
tensor([ 0.0514,  0.4571,  0.4940,  0.0357, -0.3324, -0.0348,  0.1599,  0.2477],
       requires_grad=True)
2.weight
Parameter containing:
tensor([[ 0.1187, -0.0597,  0.0026, -0.3163,  0.3186, -0.3284,  0.3375, -0.1357]],
       requires_grad=True)
2.bias
Parameter containing:
tensor([-0.1913], requires_grad=True)


In [15]:
for i in net.parameters():
    print(i)

Parameter containing:
tensor([[ 0.3806,  0.4140,  0.2805, -0.0232],
        [ 0.3593, -0.2246,  0.1993, -0.1817],
        [ 0.4313,  0.4668,  0.4079,  0.1663],
        [-0.4818,  0.0698,  0.1350,  0.1613],
        [ 0.0334, -0.1587, -0.3957,  0.3452],
        [-0.3400, -0.1573,  0.1614,  0.0756],
        [-0.1307,  0.2563,  0.0468,  0.2621],
        [ 0.2672, -0.4869, -0.4331,  0.3235]], requires_grad=True)
Parameter containing:
tensor([ 0.0514,  0.4571,  0.4940,  0.0357, -0.3324, -0.0348,  0.1599,  0.2477],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1187, -0.0597,  0.0026, -0.3163,  0.3186, -0.3284,  0.3375, -0.1357]],
       requires_grad=True)
Parameter containing:
tensor([-0.1913], requires_grad=True)


In [16]:
###有了名字之后可以通过名字来访问对应的参数
net.state_dict()

OrderedDict([('0.weight',
              tensor([[ 0.3806,  0.4140,  0.2805, -0.0232],
                      [ 0.3593, -0.2246,  0.1993, -0.1817],
                      [ 0.4313,  0.4668,  0.4079,  0.1663],
                      [-0.4818,  0.0698,  0.1350,  0.1613],
                      [ 0.0334, -0.1587, -0.3957,  0.3452],
                      [-0.3400, -0.1573,  0.1614,  0.0756],
                      [-0.1307,  0.2563,  0.0468,  0.2621],
                      [ 0.2672, -0.4869, -0.4331,  0.3235]])),
             ('0.bias',
              tensor([ 0.0514,  0.4571,  0.4940,  0.0357, -0.3324, -0.0348,  0.1599,  0.2477])),
             ('2.weight',
              tensor([[ 0.1187, -0.0597,  0.0026, -0.3163,  0.3186, -0.3284,  0.3375, -0.1357]])),
             ('2.bias', tensor([-0.1913]))])

In [21]:
for i in net.parameters():
    print(i)

Parameter containing:
tensor([[ 0.3806,  0.4140,  0.2805, -0.0232],
        [ 0.3593, -0.2246,  0.1993, -0.1817],
        [ 0.4313,  0.4668,  0.4079,  0.1663],
        [-0.4818,  0.0698,  0.1350,  0.1613],
        [ 0.0334, -0.1587, -0.3957,  0.3452],
        [-0.3400, -0.1573,  0.1614,  0.0756],
        [-0.1307,  0.2563,  0.0468,  0.2621],
        [ 0.2672, -0.4869, -0.4331,  0.3235]], requires_grad=True)
Parameter containing:
tensor([ 0.0514,  0.4571,  0.4940,  0.0357, -0.3324, -0.0348,  0.1599,  0.2477],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1187, -0.0597,  0.0026, -0.3163,  0.3186, -0.3284,  0.3375, -0.1357]],
       requires_grad=True)
Parameter containing:
tensor([-0.1913], requires_grad=True)


In [22]:
for i in net.named_parameters():
    print(i)

('0.weight', Parameter containing:
tensor([[ 0.3806,  0.4140,  0.2805, -0.0232],
        [ 0.3593, -0.2246,  0.1993, -0.1817],
        [ 0.4313,  0.4668,  0.4079,  0.1663],
        [-0.4818,  0.0698,  0.1350,  0.1613],
        [ 0.0334, -0.1587, -0.3957,  0.3452],
        [-0.3400, -0.1573,  0.1614,  0.0756],
        [-0.1307,  0.2563,  0.0468,  0.2621],
        [ 0.2672, -0.4869, -0.4331,  0.3235]], requires_grad=True))
('0.bias', Parameter containing:
tensor([ 0.0514,  0.4571,  0.4940,  0.0357, -0.3324, -0.0348,  0.1599,  0.2477],
       requires_grad=True))
('2.weight', Parameter containing:
tensor([[ 0.1187, -0.0597,  0.0026, -0.3163,  0.3186, -0.3284,  0.3375, -0.1357]],
       requires_grad=True))
('2.bias', Parameter containing:
tensor([-0.1913], requires_grad=True))


# 从嵌套块收集参数

In [31]:
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4))

def block2():
    net=nn.Sequential()
    for i in range(2):
        net.add_module(f'block{i}',block1())
    return net
rgnet=nn.Sequential(nn.Linear(4,4),nn.ReLU(),block2())

### nn.sequential 嵌套block2 然后再嵌套block1

查看网络如何组织

In [32]:
rgnet[2][0][0].weight

Parameter containing:
tensor([[-0.1831,  0.3135, -0.0661, -0.2556],
        [ 0.1620,  0.1871, -0.4294,  0.4322],
        [ 0.2671,  0.4584, -0.3968,  0.1990],
        [-0.0042,  0.4970, -0.0663,  0.2106],
        [ 0.0571, -0.4672, -0.3439, -0.2352],
        [-0.3504, -0.2263, -0.3864,  0.4536],
        [-0.4980,  0.4657,  0.4735, -0.4893],
        [ 0.4946,  0.2168, -0.0788, -0.0902]], requires_grad=True)

# 初始化参数

内置初始化  
torch.nn.init 此模块所有函数都用于初始化神经网络参数，且默认在nograd模式下运行，不会再autograd考虑在内  
这个系列的函数都是一样 用数据填充输入的张量

In [33]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight,mean=0,std=0.01)
        nn.init.zeros_(m.bias)
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)

In [34]:
##利用apply 可以递归到每一个模型
net.apply(init_normal)
net[0].weight.data[0],net[0].bias.data[0]

(tensor([-0.0230, -0.0031,  0.0015, -0.0112]), tensor(0.))

对不同块应用不同的初始化方法

In [35]:
def xavier_init(m):
    if type(m) == nn.Linear:
        nn.init.xavier_normal_(m.weight)
def init_42(m):
    if isinstance(m, nn.Linear):
        nn.init.constant_(m.weight, 42)
        
net[0].apply(xavier_init)
net[2].apply(init_42)
print(net[0].state_dict()['weight'])
print(net[2].state_dict()['weight'])

tensor([[ 0.6958, -0.3158,  0.1149,  0.0274],
        [-0.6071, -0.3464,  0.0849,  0.4299],
        [-0.0868, -0.1794, -0.3062,  0.0750],
        [-0.2680, -0.2814,  0.0599,  0.6728],
        [ 0.8363, -0.2923,  0.2463,  0.3940],
        [-0.0057,  0.0208,  0.3185,  0.2744],
        [-0.0830, -0.2051, -0.2553, -0.6667],
        [ 0.3550,  0.4911,  0.5734,  0.0807]])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


自定义初始化  
定义一个初始化函数做想做的事情

In [36]:
def my_init(m):
    if type(m) == nn.Linear:
        print("init",
              *[(name,param.shape) for name,param in m.named_parameters()]
              )
        nn.init.uniform_(m.weight, -0.1, 0.1)
        m.weight.data *=m.weight.data.abs()>=5


In [39]:
net.apply(my_init)

init ('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
init ('weight', torch.Size([1, 8])) ('bias', torch.Size([1]))


Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

也可以直接操作修改参数

In [50]:
net[0].state_dict()['weight'][0][0]=42
net[0].state_dict()['weight']

tensor([[42., -0.,  0., -0.],
        [-0., -0., -0.,  0.],
        [-0., -0.,  0., -0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0., -0.,  0.],
        [ 0., -0.,  0., -0.],
        [ 0.,  0.,  0., -0.],
        [-0.,  0.,  0.,  0.]])

In [53]:
net[0].state_dict()['weight']+=1
net[0].state_dict()['weight']

tensor([[44.,  2.,  2.,  2.],
        [ 2.,  2.,  2.,  2.],
        [ 2.,  2.,  2.,  2.],
        [ 2.,  2.,  2.,  2.],
        [ 2.,  2.,  2.,  2.],
        [ 2.,  2.,  2.,  2.],
        [ 2.,  2.,  2.,  2.],
        [ 2.,  2.,  2.,  2.]])

# 参数绑定  
### 目的是在不同的层之间共享权重，要变一起变，完全一样

In [54]:
##首先构造一个需要share的权重
shared=nn.Linear(8,8)
net=nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,nn.ReLU(),nn.Linear(8,1))

In [57]:
x=torch.randn((1,4))

In [65]:
net[2].weight.data==net[4].weight.data

tensor([[True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True]])