In [3]:
# 层和块
# 回顾一下多层感知机
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2, 20)  # 均匀分布, 范围 [0, 1)
net(X)

tensor([[-0.0862,  0.0145,  0.0616, -0.0176, -0.0291,  0.2594,  0.0655,  0.0698,
         -0.1469, -0.3769],
        [-0.0418,  0.0581,  0.0731,  0.1903, -0.0646,  0.0704, -0.0212,  0.1093,
          0.0746, -0.3484]], grad_fn=<AddmmBackward0>)

In [4]:
# 自定义块
class MLP(nn.Module):
    def __init__(self):
        super().__init__()  # 调用父类的初始化函数
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

In [5]:
# 实例化多层感知机的层, 然后在每次调用正向传播函数时调用这些层
net = MLP()
net(X)

tensor([[ 0.0315,  0.1111,  0.0529,  0.0135, -0.0158,  0.0265, -0.1347,  0.1681,
         -0.1510, -0.1245],
        [ 0.1259,  0.0473,  0.0422,  0.0468, -0.1243,  0.0686, -0.1296,  0.0927,
         -0.1452, -0.0902]], grad_fn=<AddmmBackward0>)

In [8]:
""" 两个模型结构一致, 值不一样的原因是：初始化权重随机 """
# 顺序块
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block

    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X

net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[ 0.0498, -0.1223,  0.0857, -0.0032, -0.0143,  0.5011,  0.2174, -0.1478,
         -0.0238,  0.1054],
        [-0.0847, -0.0383, -0.0025,  0.0608,  0.0788,  0.3700,  0.1331, -0.0901,
          0.2251,  0.1036]], grad_fn=<AddmmBackward0>)

In [9]:
# 在正向传播函数中执行代码
# 通过对nn.Module的继承, 可以更灵活地进行前向计算
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad=False)  # 不参与训练, 故而不计算梯度
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

net = FixedHiddenMLP()
net(X)

tensor(0.1225, grad_fn=<SumBackward0>)

In [10]:
# 混合搭配各种组合块的方法
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(), 
                                nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(0.1357, grad_fn=<SumBackward0>)