首先，回忆一下多层感知机

In [None]:
import torch
from torch import nn
from torch.nn import functional as F

In [None]:
net=nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
X=torch.rand(2,20)
net(X)

tensor([[-0.0551, -0.2536,  0.0096, -0.0515, -0.0462,  0.1455, -0.1643, -0.0903,
          0.3246, -0.0154],
        [ 0.1044, -0.0854,  0.1892, -0.0471, -0.0281,  0.1852, -0.0816, -0.1357,
          0.2563,  0.0503]], grad_fn=<AddmmBackward>)

自定义块

In [None]:
#和上面的模型一样
class MLP(nn.Module):
  def __init__(self):
    super().__init__()
    self.hidden=nn.Linear(20,256)
    self.out=nn.Linear(256,10)

  def forward(self,X):
    return self.out(F.relu(self.hidden(X)))

实例化多层感知机的层，然后在每次调用正向传播函数时调用这些层

In [None]:
net=MLP()
net(X)

tensor([[-0.0069,  0.3055,  0.1357,  0.0674, -0.2480, -0.1679,  0.1056,  0.0578,
          0.1124, -0.0241],
        [-0.0706,  0.1582,  0.1425, -0.1122, -0.1492, -0.0686,  0.0096,  0.0643,
         -0.1240,  0.0565]], grad_fn=<AddmmBackward>)

顺序块

In [None]:
class MYSequential(nn.Module):
  def __init__(self,*args):
    super().__init__()
    for block in args:
      self._modules[block]=block
  
  def forward(self,X):
    for block in self._modules.values():
      X=block(X)
    return X

net=MYSequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net(X)


tensor([[-0.3686,  0.1643,  0.0918,  0.0381, -0.1996, -0.0994, -0.3390, -0.0632,
          0.0231, -0.0912],
        [-0.1383,  0.0167, -0.0256, -0.0704, -0.2475, -0.1524, -0.2265, -0.0599,
          0.0457, -0.0889]], grad_fn=<AddmmBackward>)

在正向传播中执行代码

In [None]:
class FixedHiddenMLP(nn.Module):
  def __init__(self):
    super().__init__()
    self.rand_weight=torch.rand((20,20),requires_grad=False)
    self.linear=nn.Linear(20,20)
  
  def forward(self,X):
    X=self.linear(X)
    X=F.relu(torch.mm(X,self.rand_weight)+1)#矩阵乘法
    X=self.linear(X)
    while X.abs().sum()>1:
      X/=2
    return X.sum()

net=FixedHiddenMLP()
net(X)

tensor(-0.0671, grad_fn=<SumBackward0>)

混合搭配各种组合块的方法

In [None]:
class NestMLP(nn.Module):
  def __init__(self):
    super().__init__()
    self.net=nn.Sequential(nn.Linear(20,64),nn.ReLU(),nn.Linear(64,32),nn.ReLU())
    self.linear=nn.Linear(32,16)
  
  def forward(self,X):
    return self.linear(self.net(X))
  
chimera=nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
chimera(X)


tensor(-0.0180, grad_fn=<SumBackward0>)