In [None]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2, 20)
net(X) 

tensor([[ 0.1517, -0.0553,  0.1290, -0.0061, -0.1314, -0.0055, -0.0852, -0.0596,
         -0.0033, -0.3120],
        [ 0.1791, -0.1008,  0.0830,  0.0961, -0.1157,  0.0339,  0.1410, -0.0425,
         -0.1499, -0.3830]], grad_fn=<AddmmBackward0>)

### 5.1.1 自定义块

In [3]:
class MLP(nn.Module):
    def __init__(self):
        # 调用MLP的父类Module的构造函数来执行必要的初始化
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
        self.connections = []  # 添加 connections 属性

    def forward(self, X):
        """
        定义前向传播函数。

        参数:
        - X: 输入数据，形状为 (batch_size, input_size)

        返回:
        - output: 经过前向传播后的输出数据，形状为 (batch_size, output_size)
        """
        # 首先通过隐藏层 self.hidden 对输入 X 进行线性变换
        hidden_output = self.hidden(X)
        # 然后对隐藏层的输出应用 ReLU 激活函数
        activated_output = F.relu(hidden_output)
        # 最后通过输出层 self.out 对激活后的输出进行线性变换，得到最终的输出
        output = self.out(activated_output)
        return output

    def connect(self, other_block):
        if not hasattr(self, 'connections'):
            self.connections = []
        if other_block not in self.connections:
            self.connections.append(other_block)
            other_block.connections.append(self)


In [4]:
def generate_instances(base_block, num_instances):
    instances = [MLP() for _ in range(num_instances)]
    return instances

def build_network(base_block, num_instances):
    instances = generate_instances(base_block, num_instances)
    network = []

    for instance in instances:
        network.append(instance)
        for other_instance in instances:
            if instance != other_instance:
                instance.connect(other_instance)

    return network

# 示例用法
base_block = MLP()
num_instances = 5
network = build_network(base_block, num_instances)

for block in network:
    print(block)    
    print(f"{block} is connected to {[b for b in block.connections]}")

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)
MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
) is connected to [MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
), MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
), MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
), MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)]
MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)
MLP(
  (hidden): Linear(in_features=20, out_features=25

In [5]:
net = MLP()
net(X)

tensor([[-0.2365, -0.0466, -0.2075,  0.0793,  0.0025, -0.0517,  0.2283, -0.1871,
         -0.0762,  0.1137],
        [-0.1012, -0.0203, -0.2308,  0.0191, -0.0831, -0.0376,  0.1771, -0.1477,
         -0.1186,  0.0908]], grad_fn=<AddmmBackward0>)

### 5.1.2 顺序块

In [6]:
class MySequential(nn.Module):
    def __init__(self, *args):
        """
        初始化MySequential类的实例。

        参数:
        *args: 可变数量的模块，将按照顺序添加到MySequential实例中。
        """
        super().__init__()
        # 遍历传入的模块列表
        for idx, module in enumerate(args):
            # 将每个模块添加到MySequential实例中，并使用索引作为模块的名称
            self._modules[str(idx)] = module

    def forward(self, X):
        """
        前向传播方法。

        参数:
        X: 输入数据。

        返回:
        经过所有模块处理后的输出数据。
        """
        # 遍历MySequential实例中的所有模块
        for block in self._modules.values():
            # 对输入数据进行前向传播，并将输出
            X = block(X)
        return X
            

In [7]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[-0.2666, -0.1289, -0.0157,  0.0398,  0.1279,  0.0733, -0.0466, -0.0374,
          0.1114, -0.2472],
        [-0.2652, -0.0487,  0.1382,  0.0522,  0.1025,  0.0683,  0.0764, -0.0402,
          0.1028, -0.2212]], grad_fn=<AddmmBackward0>)

### 5.1.3. 在前向传播函数中执行代码

In [8]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # 不计算梯度的随机权重参数。因此其在训练期间保持不变
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)        

    def forward(self, X):
        X = self.linear(X)
        # 使用创建的常量参数以及relu和mm函数
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        # 复用全连接层。这相当于两个全连接层共享参数
        X = self.linear(X)
        # 控制流
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

In [9]:
net = FixedHiddenMLP()
net(X)

tensor(-0.0615, grad_fn=<SumBackward0>)

In [10]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

In [11]:
chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(-0.0537, grad_fn=<SumBackward0>)

In [12]:
net = FixedHiddenMLP()
net(X)

tensor(-0.1018, grad_fn=<SumBackward0>)

In [13]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(0.2169, grad_fn=<SumBackward0>)

In [14]:
# Practice Problem 2
#实现一个块，它以两个块为参数，例如net1和net2，并返回前向传播中两个网络的串联输出。这也被称为平行块。
import torch
from torch import nn
from torch.nn import functional as F

class Parallel(nn.Module):
    def __init__(self, net1, net2):
        super().__init__()
        self.net1, self.net2 = net1, net2
    def forward(self, X):
        return torch.cat((self.net1(X), self.net2(X)), 1)
# Practice Problem 3
# 假设我们想要连接同一网络的多个实例。实现一个函数，该函数生成同一个块的多个实例，并在此基础上构建更大的网络。
def block1():
    return nn.Sequential(
        nn.Conv2d(1, 64, kernel_size=3, padding=1), nn.ReLU(),
        nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2))

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())
    return net

In [15]:
class Block:
    def __init__(self, name):
        self.name = name
        self.connections = []

    def connect(self, other_block):
        if other_block not in self.connections:
            self.connections.append(other_block)
            other_block.connections.append(self)

    def __repr__(self):
        return f"Block({self.name})"

def generate_instances(base_block, num_instances):
    instances = [Block(f"{base_block.name}_{i}") for i in range(num_instances)]
    return instances

def build_network(base_block, num_instances):
    instances = generate_instances(base_block, num_instances)
    network = []

    for instance in instances:
        network.append(instance)
        for other_instance in instances:
            if instance != other_instance:
                instance.connect(other_instance)

    return network

# 示例用法
base_block = Block("Base")
num_instances = 5
network = build_network(base_block, num_instances)

for block in network:
    print(f"{block.name} is connected to {[b.name for b in block.connections]}")

Base_0 is connected to ['Base_1', 'Base_2', 'Base_3', 'Base_4']
Base_1 is connected to ['Base_0', 'Base_2', 'Base_3', 'Base_4']
Base_2 is connected to ['Base_0', 'Base_1', 'Base_3', 'Base_4']
Base_3 is connected to ['Base_0', 'Base_1', 'Base_2', 'Base_4']
Base_4 is connected to ['Base_0', 'Base_1', 'Base_2', 'Base_3']
