## 层与块
首先，我们回顾一下多层感知机

In [6]:
import torch
from torch import nn
from torch.nn import functional as F

In [7]:
net = nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
X = torch.randn(2,20)
net(X)

tensor([[ 0.3390, -0.0053,  0.2116, -0.3555,  0.3102,  0.0054, -0.0433,  0.0459,
          0.0014, -0.0559],
        [ 0.1975, -0.2844, -0.1418, -0.2668,  0.4706,  0.2997, -0.0676, -0.0683,
          0.4989, -0.1652]], grad_fn=<AddmmBackward0>)

## 自定义块

In [8]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)));

## 实例化多层感知机的层，然后每次调用正向传播函数时调用这些层

In [9]:
net = MLP()
net(X)

tensor([[-0.1415, -0.1629,  0.1308,  0.0994, -0.1630,  0.0100, -0.0199, -0.3710,
          0.2443,  0.0805],
        [ 0.0833, -0.0913,  0.2660, -0.1360,  0.1339,  0.1052, -0.0967, -0.5183,
         -0.1900, -0.2999]], grad_fn=<AddmmBackward0>)

## 顺序块

In [13]:
class MySequential(nn.Module):
    def __init__(self,*args):
        super().__init__()
        for block in args:
            self._modules[block] = block
    def forward(self,X):
        for block in self._modules.values():
            X = block(X)
        return X
net = MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net(X)

tensor([[-0.0117,  0.0682,  0.2295,  0.0626, -0.0473, -0.0626,  0.0010, -0.0131,
         -0.3843, -0.1101],
        [ 0.0746,  0.0359,  0.0613, -0.2553,  0.0214, -0.0585, -0.2741, -0.5369,
          0.1368, -0.1335]], grad_fn=<AddmmBackward0>)

## 在正向传播过程中执行代码

In [17]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__();
        self.rand_weight = torch.rand((20,20),requires_grad = False)
        self.linear = nn.Linear(20,20)
    def forward(self,X):
        X = self.linear(X);
        X = F.relu(torch.mm(X,self.rand_weight)+1)
        X = self.linear(X);
        while X.abs().sum()>1:
            X = X/2
        return X.sum()
net = FixedHiddenMLP()
net(X)

tensor(-0.0631, grad_fn=<SumBackward0>)

## 混合搭配各种组合块的方法

In [19]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20,64),nn.ReLU(),
                                nn.Linear(64,32),nn.ReLU())
        self.linear = nn.Linear(32,16)
    def forward(self,X):
        return self.linear(self.net(X))
chimera = NestMLP()
chimera(X)

tensor([[-0.1077,  0.3021, -0.2206,  0.0546, -0.0242, -0.0605,  0.0357, -0.0237,
         -0.1163,  0.0116,  0.3313, -0.0357, -0.0277,  0.0314,  0.0106,  0.0848],
        [-0.1348,  0.1672, -0.1882,  0.0621,  0.1433, -0.1572,  0.0756, -0.0304,
         -0.1552,  0.0672,  0.1254, -0.1044, -0.1078, -0.0255,  0.0338,  0.0593]],
       grad_fn=<AddmmBackward0>)