## 模型构造

In [1]:
import torch
from torch import nn
import torch.nn.functional as F

class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad=False)  # 固定的随机权重，不参与梯度更新
        self.linear = nn.Linear(20, 20)  # 一个可训练的线性层

    def forward(self, X):
        X = self.linear(X)  # 第一个线性层变换
        X = F.relu(torch.mm(X, self.rand_weight) + 1)  # 固定权重矩阵 + 偏移 + 激活
        X = self.linear(X)  # 再次通过线性层
        while X.abs().sum() > 1:  # 如果总值绝对值太大，反复除以2
            X /= 2
        return X.sum()  # 返回最终的标量输出
net = FixedHiddenMLP()
print(net(torch.randn(20, 20)))

tensor(-0.0876, grad_fn=<SumBackward0>)


In [2]:
import torch
from torch import nn
import torch.nn.functional as F

# 嵌套的 MLP 模块
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(20, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

# 假设 FixedHiddenMLP 已在之前定义过

# 像搭积木一样组合多个模块
chimera = nn.Sequential(
    NestMLP(),              # 输入为 shape=(batch, 20)
    nn.Linear(16, 20),      # 将中间结果从 16 → 20
    FixedHiddenMLP()        # 接入前面定义的自定义模块
)

# 使用示例（假设输入为 batch size 为 2，维度为 20 的张量）
X = torch.rand(2, 20)
output = chimera(X)
print(output)


tensor(-0.4424, grad_fn=<SumBackward0>)


## 参数管理

In [13]:


import torch
from torch import nn

# 定义一个简单的前馈神经网络
net = nn.Sequential(
    nn.Linear(4, 8),   # 输入层：4维输入 → 8维输出
    nn.ReLU(),         # 激活函数
    nn.Linear(8, 1)    # 输出层：8维 → 1维
)

# 输入一个 shape=(2, 4) 的批次数据（batch_size=2，每个样本4个特征）
X = torch.rand(size=(2, 4))

# 前向传播
output = net(X)
print(output)

# 是在打印 net 中第 2 个模块的参数（权重和偏置）。
print(net[2].state_dict())

print(type(net[2].bias))  # <class 'torch.nn.parameter.Parameter'>
print(net[2].bias)
print(net[2].bias.data)
print(net[2].bias.grad)

print(*[(name, param.shape) for name, param in net[0].named_parameters()])

# 只有0和2，1拿不出来因为1是relu
print(*[(name, param.shape) for name, param in net.named_parameters()])


# 可以直接访问
print(net.state_dict()['2.bias'].data)





tensor([[-0.1410],
        [ 0.0042]], grad_fn=<AddmmBackward0>)
OrderedDict({'weight': tensor([[ 0.2530,  0.1816, -0.3351,  0.2569, -0.2581, -0.0274,  0.2024,  0.3315]]), 'bias': tensor([-0.2355])})
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.2355], requires_grad=True)
tensor([-0.2355])
None
('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))
tensor([-0.2355])


In [15]:
# 模型嵌套
import torch
from torch import nn

# 定义一个基本模块 block1：4→8→ReLU→8→4→ReLU
def block1():
    return nn.Sequential(
        nn.Linear(4, 8),
        nn.ReLU(),
        nn.Linear(8, 4),
        nn.ReLU()
    )

# 定义一个复合模块 block2：堆叠4个 block1
def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())
    return net

# 构建完整模型：block2 + Linear(4,1)
rgnet = nn.Sequential(
    block2(),
    nn.Linear(4, 1)
)

# 测试输入
X = torch.rand(2, 4)
output = rgnet(X)
print(output)

print(rgnet)


tensor([[0.4715],
        [0.4715]], grad_fn=<AddmmBackward0>)
Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [16]:
# 参数绑定
import torch
from torch import nn

shared = nn.Linear(8, 8)  # 创建一个共享的线性层

net = nn.Sequential(
    nn.Linear(4, 8),  # net[0]
    nn.ReLU(),        # net[1]
    shared,           # net[2]
    nn.ReLU(),        # net[3]
    shared,           # net[4]
    nn.ReLU(),        # net[5]
    nn.Linear(8, 1)   # net[6]
)

# 输入数据
X = torch.rand(2, 4)
net(X)

# 验证 net[2] 和 net[4] 是否共享参数（它们都引用 shared）
print(net[2].weight.data[0] == net[4].weight.data[0])  # True

# 修改 net[2] 的某个权重
net[2].weight.data[0, 0] = 100

# 再次比较两个共享层的参数
print(net[2].weight.data[0] == net[4].weight.data[0])  # True，说明共享成功


tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])


In [22]:
# 自定义层
import torch
import torch.nn.functional as F
from torch import nn

# 自定义“中心化层”：输出 = 输入 - 均值
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, X):
        return X - X.mean()

# 实例化层
layer = CenteredLayer()

# 测试输入
output = layer(torch.FloatTensor([1, 2, 3, 4, 5]))
print(output)

net = nn.Sequential(
    nn.Linear(5, 8),
    CenteredLayer(),
)

Y = net(torch.FloatTensor([1, 2, 3, 4, 5]))
Y.mean()

# 自定义线性层
import torch
import torch.nn.functional as F
from torch import nn

class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))  # 可学习的权重参数
        self.bias = nn.Parameter(torch.randn(units))              # 可学习的偏置参数

    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)  # 加上 ReLU 激活函数

dense = MyLinear(5, 3)
dense.weight

tensor([-2., -1.,  0.,  1.,  2.])


Parameter containing:
tensor([[-1.8933, -0.4900,  0.8610],
        [ 1.5505, -0.9520,  0.9694],
        [ 0.0120,  0.1071, -0.8248],
        [-0.2009,  0.3434, -0.2102],
        [-0.6891,  1.3203, -0.7312]], requires_grad=True)