# 层和块

我们先回顾一下多层感知机

nn.Sequential是一个特殊的Module，Module是Pytorch中一个重要的概念。

In [5]:
import torch
from torch import nn
from torch.nn import functional as F  #实现了大量的常用的函数

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(13, 20)
net(X)

tensor([[-0.1475,  0.0159, -0.1177,  0.1073,  0.2203,  0.2706,  0.0914,  0.1171,
         -0.2471,  0.1031],
        [-0.1090,  0.0156, -0.0975,  0.0302,  0.2010,  0.0902, -0.0193,  0.1484,
         -0.0643,  0.1076],
        [-0.2749,  0.1184, -0.2542, -0.0799,  0.1495,  0.0940,  0.0667,  0.2203,
         -0.1603,  0.1178],
        [-0.1931, -0.0202, -0.0324,  0.0745,  0.1286,  0.2591, -0.0437,  0.1139,
         -0.1461, -0.0087],
        [-0.1196, -0.0079, -0.1701,  0.0151,  0.3508,  0.1624,  0.0555,  0.2084,
         -0.1878,  0.1419],
        [-0.1839,  0.0425, -0.1565, -0.0610,  0.1489,  0.1106,  0.0424,  0.1441,
         -0.2367,  0.1702],
        [-0.2434,  0.0356, -0.1619,  0.0172,  0.0574,  0.1731, -0.0662,  0.1566,
         -0.1461,  0.1368],
        [-0.3324,  0.0255, -0.3123, -0.0299,  0.2178,  0.2532, -0.0330,  0.2218,
         -0.1687,  0.1344],
        [-0.1630,  0.0058, -0.1298,  0.0770,  0.1091,  0.1280, -0.0814,  0.1591,
         -0.1781,  0.0422],
        [-0.2117, -

`nn.Sequential`定义了一种特殊的`Module`

自定义块

任何层或者神经网络它都是Module的一个子类

In [6]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

实例化多层感知机的层，然后在每次调用前向传播函数时调用这些层

In [7]:
net = MLP()
net(X)

tensor([[ 0.1333,  0.1223, -0.0129,  0.0603, -0.0635,  0.0579,  0.0651, -0.1297,
         -0.0852, -0.0322],
        [ 0.2198,  0.1225, -0.0135, -0.0151, -0.0662,  0.1993,  0.1132, -0.1058,
         -0.0293,  0.0373],
        [ 0.0664,  0.0981, -0.0997,  0.0478, -0.0197,  0.0955,  0.0186, -0.2387,
         -0.0726,  0.1784],
        [ 0.2164,  0.1325,  0.0803,  0.1088, -0.1067,  0.1667,  0.0975, -0.1387,
         -0.1251,  0.0394],
        [ 0.2300,  0.0400, -0.0672,  0.0450,  0.0468,  0.1684,  0.1816, -0.1911,
          0.0096,  0.0969],
        [ 0.1019,  0.0661, -0.0151,  0.1244, -0.0524,  0.0364,  0.0859, -0.0744,
         -0.0422,  0.1857],
        [ 0.1487,  0.1178,  0.0352,  0.0301, -0.1610,  0.1546,  0.0483, -0.0741,
         -0.1992,  0.0781],
        [ 0.1464,  0.0480,  0.0650,  0.1156, -0.0781,  0.1089,  0.1531, -0.1400,
         -0.1937,  0.1607],
        [ 0.2629,  0.1264,  0.0543,  0.2460, -0.0579,  0.1285,  0.0740, -0.0281,
         -0.0727,  0.0352],
        [ 0.1660,  

顺序块

In [9]:
class MySequential(nn.Module):
    # 自己实现一个Sequential类
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args): 
            # enumerate()函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列
            self._modules[str(idx)] = module # _modules是OrderedDict类型

    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X

net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[-0.2823,  0.1268, -0.0882, -0.0446,  0.0780,  0.3507,  0.0112,  0.2067,
         -0.1938, -0.1716],
        [-0.2472,  0.1245, -0.1337, -0.0296,  0.0358,  0.3724, -0.0504,  0.1640,
         -0.1696, -0.1739],
        [-0.1720,  0.0966, -0.0725, -0.0502,  0.1674,  0.4284, -0.1432,  0.2720,
         -0.1891, -0.1263],
        [-0.1921,  0.0479, -0.0618, -0.0994,  0.1448,  0.2566, -0.0216,  0.2942,
         -0.1117, -0.1548],
        [-0.2752,  0.2674, -0.0642, -0.0895,  0.1941,  0.3307,  0.0273,  0.2110,
         -0.2092, -0.1797],
        [-0.1497,  0.0755, -0.0728, -0.0471,  0.1507,  0.3213, -0.1075,  0.2528,
         -0.2040, -0.0992],
        [-0.2474, -0.0078, -0.1098, -0.0901,  0.2156,  0.3968, -0.1481,  0.2028,
         -0.2634, -0.1489],
        [-0.4370,  0.1675, -0.0349, -0.1193,  0.2424,  0.3380, -0.1130,  0.3659,
         -0.2329, -0.1055],
        [-0.1814, -0.0392, -0.0052,  0.0101,  0.1222,  0.2393,  0.0247,  0.2074,
         -0.2448, -0.1954],
        [-0.2742,  

在前向传播函数中执行代码

In [10]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad=False) # 不参与训练的参数（常数参数）
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)  # 可以写很多自定义的计算
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

net = FixedHiddenMLP()
net(X)

tensor(-0.2984, grad_fn=<SumBackward0>)

混合搭配各种组合块的方法

In [8]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(   nn.Linear(20, 64), nn.ReLU(),
                                    nn.Linear(64, 32), nn.ReLU()) # 可以混合使用
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(-0.4822, grad_fn=<SumBackward0>)