## 继承`Module`构造模型

`Module`是`nn`模块提供的一个模型构造类，是所有神经网络结构的基类，可以继承它来自定义我们想要的模型。 通常需要实现`__init__`和`forward`两个函数。

在定义的类中无须自定义实现反向传播，系统将通过自动求梯度而自动生成反向传播的`backward`函数。


In [2]:
import torch
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self,**kwargs):
        super(MLP,self).__init__(**kwargs)
        self.hidden = nn.Linear(784,256)
        self.act = nn.ReLU()
        self.output = nn.Linear(256,10)

    def forward(self,x):
        a = self.act(self.hidden(x))
        return self.output(a)

x = torch.rand(2,784)
net = MLP()
print(net)
net(x)

MLP(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)


tensor([[ 0.0880,  0.0951, -0.0702, -0.0717, -0.0217,  0.2864,  0.0481,  0.0537,
          0.1295, -0.0825],
        [ 0.3202,  0.2092, -0.0238, -0.0530,  0.0234,  0.2822,  0.1265, -0.1255,
          0.0528, -0.0278]], grad_fn=<AddmmBackward>)

## `Module`的子类

### `Sequential`
`Sequential`可以接收一个子模块的有序字典(OrderedDict)或者一系列子模块作为参数来逐一添加`Module`的实例，而模型的前向计算就是将这些实例按照添加顺序逐一计算。


In [4]:
class MySequential(nn.Module):
    from collections import OrderedDict

    def __init__(self,*args):
        super(MySequential,self).__init__()

        if len(args) == 1 and isinstance(args[0],OrderedDict):
            for key,module in args.items():
                # add_module方法会将module添加到self._modules(一个OrderedDict)
                self.add_module(key,module) 
        else:
            for idx,module in enumerate(args):
                self.add_module(str(idx),module)

    def forward(self,input):
        # self._modules返回一个OrderedDict，保证成员按照添加时的顺序遍历
        for module in self._modules.values():
            input = module(input)

        return input

net = MySequential(nn.Linear(784,256),nn.ReLU(),nn.Linear(256,10))
print(net)
net(x)

MySequential(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


tensor([[ 0.1194,  0.0164,  0.1449, -0.0977, -0.1579,  0.0917,  0.0084, -0.1466,
         -0.1539,  0.0569],
        [ 0.1211,  0.0190,  0.1228,  0.0579, -0.0767,  0.0438,  0.0250, -0.0359,
         -0.1090, -0.0024]], grad_fn=<AddmmBackward>)

### `ModuleList`

`ModuleList`接收一个子模块的列表作为输入，可以像`list`那样进行`append`和`extend`操作


In [5]:
net = nn.ModuleList([nn.Linear(784,256),nn.ReLU()])
net.append(nn.Linear(256,10))
print(net[-1])
print(net)

Linear(in_features=256, out_features=10, bias=True)
ModuleList(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


### `ModuleDict`

`ModuleDict`接收一个子模块的字典作为输入，类似字典那样的进行添加访问操作。

In [6]:
net = nn.ModuleDict({'linear':nn.Linear(784,256),'act':nn.ReLU()})
net['output'] = nn.Linear(256,10)

print(net['linear'])
print(net.output)
print(net)

Linear(in_features=784, out_features=256, bias=True)
Linear(in_features=256, out_features=10, bias=True)
ModuleDict(
  (act): ReLU()
  (linear): Linear(in_features=784, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)


In [None]:
## 复杂的模型
使用`Suquential`,`ModuleList`和`ModuleDict`可以构建一些简单的模型，不需要定义`forward`函数。但是直接继承`Module`可以更灵活的扩展模型

In [7]:
class FancyMLP(nn.Module):
    def __init__(self,**kwargs):
        super(FancyMLP,self).__init__(**kwargs)

        self.rand_weight = torch.rand((20,20),requires_grad=False) # 不可训练参数
        self.linear = nn.Linear(20,20)

    def forward(self,x):
        x = self.linear(x)

        x = nn.functional.relu(torch.mm(x,self.rand_weight.data) + 1)

        # 复用全连接层，等价于两个全连接层共享参数
        x = self.linear(x)

        # 控制流
        while x.norm().item() > 1:
            x /= 2
        if x.norm().item() < 0.8:
            x *= 10

        return x.sum()

x = torch.rand(2,20)
net = FancyMLP()
print(net)
net(x)

FancyMLP(
  (linear): Linear(in_features=20, out_features=20, bias=True)
)


tensor(-0.4809, grad_fn=<SumBackward0>)

In [8]:
class NestMLP(nn.Module):
    def __init__(self,**kwargs):
        super(NestMLP,self).__init__(**kwargs)
        self.net = nn.Sequential(nn.Linear(40,30),nn.ReLU())

    def forward(self,x):
        return self.net(x)

net = nn.Sequential(NestMLP(),nn.Linear(30,20),FancyMLP())

x = torch.rand(2,40)
print(x)
net(x)

tensor([[0.5943, 0.0439, 0.7584, 0.2527, 0.0257, 0.1521, 0.6511, 0.6634, 0.1116,
         0.8765, 0.8666, 0.0375, 0.6985, 0.9923, 0.6121, 0.1922, 0.7950, 0.6947,
         0.0807, 0.8435, 0.3812, 0.4148, 0.1212, 0.7269, 0.0686, 0.1769, 0.7371,
         0.5583, 0.7601, 0.0224, 0.0012, 0.9065, 0.1495, 0.7758, 0.7068, 0.8274,
         0.8461, 0.0790, 0.3010, 0.7584],
        [0.3737, 0.8310, 0.8847, 0.5191, 0.9121, 0.3877, 0.5499, 0.6093, 0.5470,
         0.4420, 0.6045, 0.6672, 0.4715, 0.8269, 0.4019, 0.6695, 0.0603, 0.0871,
         0.3629, 0.9018, 0.3537, 0.7331, 0.1684, 0.0983, 0.3853, 0.9679, 0.6830,
         0.7771, 0.9861, 0.1997, 0.1705, 0.2317, 0.3741, 0.8318, 0.2112, 0.5526,
         0.4948, 0.4162, 0.3590, 0.4691]])


tensor(6.7878, grad_fn=<SumBackward0>)