In [15]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2, 20)
net(X)

tensor([[-0.3372, -0.2499, -0.0458,  0.1428,  0.2644, -0.0560,  0.1011, -0.1804,
         -0.0920, -0.1475],
        [-0.1999, -0.0780, -0.1392,  0.2085,  0.3938, -0.1946,  0.3293, -0.0319,
         -0.1572, -0.0312]], grad_fn=<AddmmBackward0>)

### 5.1.1 自定义块

In [16]:
class MLP(nn.Module):
    def __init__(self):
        # 调用MLP的父类Module的构造函数来执行必要的初始化
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)

    def forward(self, X):
        """
        定义前向传播函数。

        参数:
        - X: 输入数据，形状为 (batch_size, input_size)

        返回:
        - output: 经过前向传播后的输出数据，形状为 (batch_size, output_size)
        """
        # 首先通过隐藏层 self.hidden 对输入 X 进行线性变换
        hidden_output = self.hidden(X)
        # 然后对隐藏层的输出应用 ReLU 激活函数
        activated_output = F.relu(hidden_output)
        # 最后通过输出层 self.out 对激活后的输出进行线性变换，得到最终的输出
        output = self.out(activated_output)
        return output


In [17]:
net = MLP()
net(X)

tensor([[-0.0772, -0.0264, -0.0347,  0.2238, -0.0604, -0.1396,  0.1877,  0.1502,
         -0.0948,  0.0564],
        [-0.2018, -0.0744, -0.1151,  0.3749, -0.0877, -0.1996,  0.0286,  0.2178,
         -0.1861, -0.0170]], grad_fn=<AddmmBackward0>)

### 5.1.2 顺序块

In [18]:
class MySequential(nn.Module):
    def __init__(self, *args):
        """
        初始化MySequential类的实例。

        参数:
        *args: 可变数量的模块，将按照顺序添加到MySequential实例中。
        """
        super().__init__()
        # 遍历传入的模块列表
        for idx, module in enumerate(args):
            # 将每个模块添加到MySequential实例中，并使用索引作为模块的名称
            self._modules[str(idx)] = module

    def forward(self, X):
        """
        前向传播方法。

        参数:
        X: 输入数据。

        返回:
        经过所有模块处理后的输出数据。
        """
        # 遍历MySequential实例中的所有模块
        for block in self._modules.values():
            # 对输入数据进行前向传播，并将输出
            X = block(X)
        return X
            

In [19]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[ 1.0225e-01, -1.0395e-02,  5.4524e-02,  1.3277e-04, -3.1180e-03,
          5.6571e-02, -2.1530e-01,  3.4120e-01,  1.3096e-01,  2.8348e-02],
        [ 4.1177e-02,  2.0106e-01, -8.6392e-03, -3.0199e-02, -1.3774e-01,
          7.5322e-02, -9.5635e-02,  3.7463e-01,  8.2559e-02, -6.5354e-03]],
       grad_fn=<AddmmBackward0>)

### 5.1.3. 在前向传播函数中执行代码

In [20]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # 不计算梯度的随机权重参数。因此其在训练期间保持不变
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)        

    def forward(self, X):
        X = self.linear(X)
        # 使用创建的常量参数以及relu和mm函数
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        # 复用全连接层。这相当于两个全连接层共享参数
        X = self.linear(X)
        # 控制流
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

In [21]:
net = FixedHiddenMLP()
net(X)

tensor(-0.0585, grad_fn=<SumBackward0>)

In [22]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

In [23]:
chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(0.0801, grad_fn=<SumBackward0>)

### 5.1.3 在前向传播函数中执行代码

In [24]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)

        while X.abs().sum() > 1:
            X /= 2

        return X.sum() 

In [25]:
net = FixedHiddenMLP()
net(X)

tensor(0.5007, grad_fn=<SumBackward0>)

In [26]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(-0.0226, grad_fn=<SumBackward0>)

In [27]:
# Practice Problem 2
#实现一个块，它以两个块为参数，例如net1和net2，并返回前向传播中两个网络的串联输出。这也被称为平行块。
import torch
from torch import nn
from torch.nn import functional as F

class Parallel(nn.Module):
    def __init__(self, net1, net2):
        super().__init__()
        self.net1, self.net2 = net1, net2
    def forward(self, X):
        return torch.cat((self.net1(X), self.net2(X)), 1)
# Practice Problem 3
# 假设我们想要连接同一网络的多个实例。实现一个函数，该函数生成同一个块的多个实例，并在此基础上构建更大的网络。
def block1():
    return nn.Sequential(
        nn.Conv2d(1, 64, kernel_size=3, padding=1), nn.ReLU(),
        nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2))

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())
    return net