In [1]:
import torch
from torch import nn
# 模型构造

In [3]:
class MLP(nn.Module):
    def __init__(self, **kwargs):
        # 调用MLP父类Module的构造函数来进行必要的初始化。这样在构造实例时还可以指定其他函数
        # 参数，如“模型参数的访问、初始化和共享”一节将介绍的模型参数params
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Linear(784, 256) # 隐藏层
        self.act = nn.ReLU()
        self.output = nn.Linear(256, 10)  # 输出层
    
    # 定义模型的前向计算，即如何根据输入x计算返回所需要的模型输出
    def forward(self, x):
        a = self.act(self.hidden(x))
        return self.output(a)

In [4]:
X = torch.rand(2, 784)
net = MLP()
net(X)

tensor([[ 0.0754,  0.0509,  0.1266,  0.1716, -0.0157,  0.0247,  0.0720, -0.2500,
          0.0772,  0.0935],
        [ 0.1211,  0.2451,  0.1236,  0.1290, -0.1524, -0.0018, -0.0401, -0.2584,
          0.0728,  0.0595]], grad_fn=<AddmmBackward>)

In [5]:
# Modlue的子类
# Sequential、ModuleList和ModuleDict

In [7]:
# Sequential类
# 实现一个与Sequential类有相同功能的MySequential类。
class MySequential(nn.Module):
    from collections import OrderedDict
    def __init__(self, *args):
        super(MySequential, self).__init__()
        if len(args) == 1 and isinstance(args[0], OrderedDict): # 如果传入的是一个OrderedDict
            for key, module in args[0].items():
                self.add_module(key, module) # add_module方法会将module添加进self._modules(一个OrderedDict)
        else: # 传入的是一些Module
            for idx, module in enumerate(args):
                self.add_module(str(idx), module)
    def forward(self, input):
        for module in self._modules.values():
            input = module(input)
        return input

In [8]:
net = MySequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Linear(256, 10)
)

In [9]:
net(X)

tensor([[-0.1446,  0.0102,  0.1610, -0.0284,  0.0165,  0.1348, -0.1432,  0.1490,
          0.0357,  0.0771],
        [-0.0993, -0.0519,  0.2163,  0.0356,  0.0914,  0.2410, -0.1299,  0.0003,
         -0.0288,  0.1145]], grad_fn=<AddmmBackward>)

In [10]:
# ModuleList类
# ModuleList接收一个子模块的列表作为输入，然后也可以类似List那样进行append和extend操作
net = nn.ModuleList([nn.Linear(784,256), nn.ReLU()])
net.append(nn.Linear(256, 10)) # 类似于List的append操作
net[-1]

Linear(in_features=256, out_features=10, bias=True)

In [11]:
net

ModuleList(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)

In [12]:
# ModuleList仅仅是一个储存各种模块的列表，这些模块之间没有联系也没有顺序（所以不用保证相邻层的输入输出维度匹配），而且没有实现forward功能需要自己实现，所以上面执行net(torch.zeros(1, 784))会报NotImplementedError；
# 而Sequential内的模块需要按照顺序排列，要保证相邻层的输入输出大小相匹配，内部forward功能已经实现。
class MyModule(nn.Module):
    def __init__(self):
        super(MyModule, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)])

    def forward(self, x):
        # ModuleList can act as an iterable, or be indexed using ints
        for i, l in enumerate(self.linears):
            x = self.linears[i // 2](x) + l(x)
        return x

In [13]:
# ModuleList不同于一般的Python的list，加入到ModuleList里面的所有模块的参数会被自动添加到整个网络中
class Module_ModuleList(nn.Module):
    def __init__(self):
        super(Module_ModuleList, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10, 10)])

class Module_List(nn.Module):
    def __init__(self):
        super(Module_List, self).__init__()
        self.linears = [nn.Linear(10, 10)]

In [14]:
Module_ModuleList()

Module_ModuleList(
  (linears): ModuleList(
    (0): Linear(in_features=10, out_features=10, bias=True)
  )
)

In [16]:
Module_List()

Module_List()

In [17]:
# ModuleDict接收一个子模块的字典作为输入, 然后也可以类似字典那样进行添加访问操作
# 和ModuleList一样，ModuleDict实例仅仅是存放了一些模块的字典，并没有定义forward函数需要自己定义。
# 同样，ModuleDict也与Python的Dict有所不同，ModuleDict里的所有模块的参数会被自动添加到整个网络中。
net = nn.ModuleDict({
    'linear': nn.Linear(784, 256),
    'act': nn.ReLU(),
})
net['output'] = nn.Linear(256, 10)

In [18]:
net['linear']

Linear(in_features=784, out_features=256, bias=True)

In [19]:
net.output

Linear(in_features=256, out_features=10, bias=True)

In [20]:
net

ModuleDict(
  (linear): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)

In [21]:
# 构造复杂的模型
class FancyMLP(nn.Module):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)

        self.rand_weight = torch.rand((20, 20), requires_grad=False) # 不可训练参数（常数参数）
        self.linear = nn.Linear(20, 20)

    def forward(self, x):
        x = self.linear(x)
        # 使用创建的常数参数，以及nn.functional中的relu函数和mm函数
        x = nn.functional.relu(torch.mm(x, self.rand_weight.data) + 1)

        # 复用全连接层。等价于两个全连接层共享参数
        x = self.linear(x)
        # 控制流，这里我们需要调用item函数来返回标量进行比较
        while x.norm().item() > 1:
            x /= 2
        if x.norm().item() < 0.8:
            x *= 10
        return x.sum()

In [22]:
X = torch.rand(2, 20)
net = FancyMLP()

In [23]:
net(X)

tensor(-4.3890, grad_fn=<SumBackward0>)

In [24]:
# FancyMLP和Sequential类都是Module类的子类，所以我们可以嵌套调用它们
class NestMLP(nn.Module):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential(nn.Linear(40, 30), nn.ReLU()) 

    def forward(self, x):
        return self.net(x)

net = nn.Sequential(NestMLP(), nn.Linear(30, 20), FancyMLP())

In [25]:
X =  torch.rand(2, 40)
net(X)

tensor(2.0292, grad_fn=<SumBackward0>)

In [26]:
net

Sequential(
  (0): NestMLP(
    (net): Sequential(
      (0): Linear(in_features=40, out_features=30, bias=True)
      (1): ReLU()
    )
  )
  (1): Linear(in_features=30, out_features=20, bias=True)
  (2): FancyMLP(
    (linear): Linear(in_features=20, out_features=20, bias=True)
  )
)