# 模型构造

## 层和块

In [2]:
import torch as tor
from torch import nn as tor_nn
from torch.nn import functional as tor_nn_F # 定义了一些函数

In [10]:
net = tor_nn.Sequential(
    tor_nn.Linear(20, 256),tor_nn.ReLU(),
    tor_nn.Linear(256, 10)
)

X = tor.rand(2, 20)
net(X)

tensor([[-0.0302, -0.1014,  0.0653, -0.1320, -0.0345,  0.0803, -0.1082,  0.0570,
          0.1802, -0.1400],
        [-0.0350, -0.0733,  0.0675, -0.1960,  0.0730,  0.0117, -0.0802,  0.1508,
          0.2768, -0.1608]], grad_fn=<AddmmBackward0>)

接下来是一个自定义的module，操作效果同上

In [8]:
class MLP(tor_nn.Module):
    def __init__(self):
        super().__init__() # 父类初始化完成
        self.hidden = tor_nn.Linear(20, 256)
        self.out    = tor_nn.Linear(256, 10)
    
    def forward(self, X):
        return self.out(tor_nn_F.relu(self.hidden(X)))

In [9]:
net = MLP()
net(X)

tensor([[ 0.2731, -0.1674, -0.0592, -0.2217,  0.2423,  0.0400, -0.0419, -0.0402,
          0.2909,  0.0204],
        [ 0.2150, -0.0593, -0.2110, -0.2889,  0.0415,  0.1299,  0.0559, -0.0840,
          0.1427,  0.1782]], grad_fn=<AddmmBackward0>)

注意，这里的大小权重任意，所以输出的值就会有所不同，但是有一点是十分重要的，就是输出的形状并未改变

### 自己的nn.Sequential

In [11]:
class MySequential(tor_nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block
    
    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X

In [12]:
net = MySequential(
    tor_nn.Linear(20, 256),tor_nn.ReLU(),
    tor_nn.Linear(256, 10)
)
net(X)

tensor([[ 0.0464,  0.3002, -0.1377, -0.1848, -0.1139, -0.2025, -0.1029,  0.2463,
          0.2105, -0.0694],
        [ 0.1102,  0.1862, -0.2201, -0.1519, -0.0805, -0.1791, -0.1531,  0.3277,
          0.1983, -0.1483]], grad_fn=<AddmmBackward0>)

从中我们可以发现一个有趣的事实就是：我们会高频的使用forward，并且令人惊奇的一点就是我们传入的数据X都被forward直接调用了！

由此，我们可以获得一个思路：我们其实传入的数据的操作是与forward深度绑定的，我们可以直接将我们的数据操作放到forward中形成我们的自定义！，看看下面这个操作

In [13]:
class FixedHiddenMLP(tor_nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.rand_weight = tor.rand((20, 20), requires_grad=False)
        self.linear = tor_nn.Linear(20, 20)
    
    def forward(self, X):
        X = self.linear(X)
        X = tor_nn_F.relu(tor.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

我们可以看到我们通过修改forward获得了多样的体验！！！

In [18]:
net = FixedHiddenMLP()
net(X)

tensor(0.4095, grad_fn=<SumBackward0>)

### 套娃玩法

In [19]:
class NestMLP(tor_nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.net = tor_nn.Sequential(
            tor_nn.Linear(20, 64), tor_nn.ReLU(),
            tor_nn.Linear(64, 32), tor_nn.ReLU()
        )
        self.linear = tor_nn.Linear(32, 16)
    
    def forward(self, X):
        return self.linear(self.net(X))

In [21]:
chimera = tor_nn.Sequential(NestMLP(), tor_nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(0.3665, grad_fn=<SumBackward0>)