#  模型构造

In [2]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2, 20)
print(X)
net(X )
# 2是batch size   20是输入维度

tensor([[0.9477, 0.8988, 0.4457, 0.1174, 0.4697, 0.6876, 0.6002, 0.3934, 0.5925,
         0.9028, 0.0741, 0.8078, 0.0614, 0.9276, 0.7920, 0.4322, 0.1043, 0.8559,
         0.3538, 0.3132],
        [0.4184, 0.8421, 0.1076, 0.5265, 0.3111, 0.8167, 0.1754, 0.7825, 0.5511,
         0.5886, 0.8964, 0.7332, 0.5342, 0.8900, 0.0879, 0.9842, 0.8324, 0.0706,
         0.4838, 0.1743]])


tensor([[-0.0851, -0.1256,  0.3145, -0.1447, -0.1538,  0.0456, -0.1097,  0.0040,
         -0.1651,  0.0274],
        [-0.0174, -0.2464,  0.1639, -0.2154, -0.0927, -0.0178, -0.1342, -0.0793,
         -0.2987,  0.1788]], grad_fn=<AddmmBackward>)

In [3]:
# 自定义

class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
        
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

In [6]:
net = MLP()
net(X)

tensor([[ 0.2143,  0.1016,  0.0156,  0.0676, -0.1374, -0.0631, -0.0134, -0.3126,
         -0.1478, -0.3857],
        [ 0.1569,  0.0802, -0.0498,  0.1532, -0.0878,  0.0331, -0.0662, -0.1283,
         -0.1233, -0.2031]], grad_fn=<AddmmBackward>)

In [9]:
nn.Linear(20, 256)(X).shape

torch.Size([2, 256])

In [10]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block
    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X

net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[-0.0706, -0.1834, -0.0820,  0.1715,  0.1428,  0.0126,  0.3185,  0.2802,
          0.0565,  0.0190],
        [-0.0461, -0.0136, -0.1735,  0.2413,  0.0412,  0.0895,  0.3582,  0.1149,
          0.0977, -0.1954]], grad_fn=<AddmmBackward>)

In [18]:
# 正向传播中执行

class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20,20),requires_grad=False) # 生成随机权重
        self.linear = nn.Linear(20, 20)
    
    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        
        return X.sum()

net = FixedHiddenMLP()
net(X)

tensor(0.1611, grad_fn=<SumBackward0>)

In [16]:
# 通过继承nn.Module  可以更灵活的前向计算 

In [20]:
# nn.Module 可以任意嵌套
# __init__内  外部类 都可以

# 参数管理

In [30]:
# 单隐藏层的多层感知机
import torch
from torch import nn

net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.rand(size=(2, 4))
net(X)

tensor([[0.0516],
        [0.0095]], grad_fn=<AddmmBackward>)

In [23]:
net[2].state_dict()

OrderedDict([('weight',
              tensor([[-0.0264,  0.3109, -0.0232,  0.3247,  0.2948, -0.0633, -0.0921,  0.0837]])),
             ('bias', tensor([0.1299]))])

In [24]:
net[2].bias

Parameter containing:
tensor([0.1299], requires_grad=True)

In [25]:
net[2].bias.data

tensor([0.1299])

In [29]:
net[2].weight.grad == None

True

In [39]:
#  一次性访问所有参数
print(* [(name, param.shape) for name, param in net[0].named_parameters()])  #    * 将列表内每个元素打印
print(* [(name, param.shape) for name, param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [40]:
#  net[1]是relu 是没有参数的

In [42]:
print(net.state_dict()["2.bias"].data)

tensor([0.0651])


## 嵌套块收集参数

In [47]:
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8,4), nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block{i}', block1())
    return net

In [48]:
rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
rgnet(X)

tensor([[-0.5428],
        [-0.5428]], grad_fn=<AddmmBackward>)

In [49]:
rgnet

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)

## linear的初始化

In [51]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01) # 均值为0,方差为0.01的正态分布初始化
        nn.init.zeros_(m.bias)  # 偏移为0
net.apply(init_normal) # 和pandas 类似  每一层调用这个函数

net[0].weight.data[0], net[0].bias.data[0]

(tensor([-0.0009, -0.0100, -0.0135, -0.0179]), tensor(0.))

In [52]:
nn.ReLU == F.relu

False

In [53]:
nn.ReLU

torch.nn.modules.activation.ReLU

In [57]:
# 参数绑定
shared = nn.Linear(8, 8)  # 出入一致即可

net = nn.Sequential(nn.Linear(4,8), nn.ReLU(), shared, nn.ReLU(), shared, nn.ReLU(), nn.Linear(8,2))

net(X)

tensor([[ 0.2005, -0.3776],
        [ 0.2185, -0.3605]], grad_fn=<AddmmBackward>)

In [58]:
net[2] == net[4]

True

In [59]:
#改变net[2] 或者 net[4]内参数 另一个也会随之改变 指向的同一个


# 自定义层

In [60]:
# 任意嵌套