In [1]:
#自定义层：目前如果存在一个在深度学习框架中还不存在的层。 在这些情况下，你必须构建自定义层。
import torch
import torch.nn.functional as F
from torch import nn

#构造一个没有任何参数的自定义层
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()#__init__()函数不进行任何初始化操作

    def forward(self, X):
        return X - X.mean()#对X中每一个元素减去均值

In [2]:
layer = CenteredLayer()
layer(torch.FloatTensor([1, 2, 3, 4, 5]))

tensor([-2., -1.,  0.,  1.,  2.])

In [3]:
#将自定义层作为组件合并到更复杂的模型中
net = nn.Sequential(nn.Linear(8, 128), CenteredLayer())

In [4]:
Y = net(torch.rand(4, 8))
Y.mean() #当给网络发送随机数据后，检查输出Y均值是否为0。 由于我们处理的是浮点数，因为存储精度的原因，我们仍然可能会看到一个非常小的非零数。

tensor(1.8626e-09, grad_fn=<MeanBackward0>)

In [5]:
#自定义一个带参数的层
class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))#采用随机初始化参数，并由Parameter()函数包裹起来，目的是给这个参数一个名字标识
        self.bias = nn.Parameter(torch.randn(units,))
    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)

In [6]:
#实例化MyLinear类并访问其模型权重参数
linear = MyLinear(5, 3)#输入5维，输出3维的层
linear.weight #随机初始化权重参数经过Parameter()函数包裹后的输出结果，多了一个"Parameter containing"名字标识

Parameter containing:
tensor([[ 0.3891, -0.4105, -0.7189],
        [-0.8655, -0.1785, -0.0529],
        [-0.1446,  1.7960,  0.1220],
        [ 0.4567,  0.0444, -0.5521],
        [ 0.8643, -1.2224,  0.5013]], requires_grad=True)

In [7]:
#使用自定义层直接执行前向传播计算
linear(torch.rand(2, 5))

tensor([[0.0000, 1.0663, 0.0194],
        [0.0000, 0.0000, 0.7687]])

In [8]:
#使用自定义层构建模型，就像使用内置的全连接层一样使用自定义层
net = nn.Sequential(MyLinear(64, 8), MyLinear(8, 1))#Sequential()包含两个自定义层
net(torch.rand(2, 64))#给模型输入，得到输出

tensor([[7.9190],
        [1.8329]])

In [9]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2, 20)
net(X)

tensor([[ 0.1081, -0.0569,  0.0548, -0.1191, -0.0337, -0.0663, -0.2199,  0.0059,
          0.0316,  0.2819],
        [ 0.1398, -0.0566,  0.1458, -0.0881, -0.0742, -0.1665, -0.3488, -0.0111,
         -0.0515,  0.2052]], grad_fn=<AddmmBackward0>)

In [10]:
class MLP(nn.Module):
    # 用模型参数声明层。这里，我们声明两个全连接的层
    def __init__(self):
        # 调用MLP的父类Module的构造函数来执行必要的初始化。
        # 这样，在类实例化时也可以指定其他函数参数，例如模型参数params（稍后将介绍）
        super().__init__()
        self.hidden = nn.Linear(20, 256)  # 隐藏层
        self.out = nn.Linear(256, 10)  # 输出层

    # 定义模型的前向传播，即如何根据输入X返回所需的模型输出
    def forward(self, X):
        # 注意，这里我们使用ReLU的函数版本，其在nn.functional模块中定义。
        return self.out(F.relu(self.hidden(X)))

In [11]:
net = MLP()
net(X)

tensor([[-0.1864,  0.1115,  0.2045,  0.1158,  0.0509, -0.0844, -0.1204, -0.1001,
          0.0161, -0.0837],
        [-0.0844,  0.0268,  0.1926,  0.1999,  0.0818, -0.0431, -0.1353, -0.1442,
          0.1123,  0.0215]], grad_fn=<AddmmBackward0>)

In [12]:
class MySequential(nn.Module):
    
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            # 这里，module是Module子类的一个实例。我们把它保存在'Module'类的成员
            # 变量_modules中。module的类型是OrderedDict
            self._modules[str(idx)] = module

    def forward(self, X):
        # OrderedDict保证了按照成员添加的顺序遍历它们
        for block in self._modules.values():
            X = block(X)
        return X

In [13]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[-0.0664, -0.1679,  0.0070, -0.0824, -0.0446,  0.0627,  0.1140,  0.1158,
         -0.1583, -0.0231],
        [-0.2058, -0.1821, -0.0210, -0.0889, -0.0383,  0.0144,  0.0755, -0.0767,
         -0.0565,  0.0800]], grad_fn=<AddmmBackward0>)

In [14]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # 不计算梯度的随机权重参数。因此其在训练期间保持不变
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        # 使用创建的常量参数以及relu和mm函数
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        # 复用全连接层。这相当于两个全连接层共享参数
        X = self.linear(X)
        print(X)
        # 控制流
        while X.abs().sum() > 1:
            X /= 2
            print(X)
        return X.sum()

In [15]:
net = FixedHiddenMLP()
net(X)

tensor([[-0.1316, -0.2483, -0.3329,  0.0347, -0.1069, -0.0733, -0.0217, -0.1368,
          0.3185,  0.0397, -0.1105,  0.0146,  0.0825,  0.1140, -0.0691, -0.0911,
         -0.1115, -0.1111,  0.1636, -0.0972],
        [ 0.0080, -0.4649, -0.4898,  0.0620,  0.1144,  0.0482, -0.0028, -0.2015,
          0.1722,  0.1002,  0.0727,  0.0766, -0.0384,  0.2080, -0.0815, -0.1130,
         -0.1624, -0.0376,  0.1681, -0.1492]], grad_fn=<AddmmBackward0>)
tensor([[-0.0658, -0.1241, -0.1665,  0.0173, -0.0535, -0.0367, -0.0109, -0.0684,
          0.1593,  0.0199, -0.0553,  0.0073,  0.0413,  0.0570, -0.0346, -0.0455,
         -0.0557, -0.0555,  0.0818, -0.0486],
        [ 0.0040, -0.2324, -0.2449,  0.0310,  0.0572,  0.0241, -0.0014, -0.1007,
          0.0861,  0.0501,  0.0363,  0.0383, -0.0192,  0.1040, -0.0407, -0.0565,
         -0.0812, -0.0188,  0.0840, -0.0746]], grad_fn=<DivBackward0>)
tensor([[-0.0329, -0.0621, -0.0832,  0.0087, -0.0267, -0.0183, -0.0054, -0.0342,
          0.0796,  0.0099, -0.0276,

tensor(-0.1981, grad_fn=<SumBackward0>)

In [16]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor([[-0.8884, -1.0308, -0.0831,  0.8758, -0.3174, -0.4683,  0.2343,  0.3336,
         -1.5240, -0.3105,  0.1103, -0.6705, -0.3887,  0.9161, -0.1999,  0.3467,
          0.3986,  0.2495, -0.3778, -0.7004],
        [-0.8986, -1.0482, -0.0859,  0.8887, -0.3268, -0.4798,  0.2363,  0.3374,
         -1.5472, -0.3220,  0.1037, -0.6756, -0.3956,  0.9269, -0.2069,  0.3607,
          0.4110,  0.2602, -0.3910, -0.7050]], grad_fn=<AddmmBackward0>)
tensor([[-0.4442, -0.5154, -0.0416,  0.4379, -0.1587, -0.2342,  0.1172,  0.1668,
         -0.7620, -0.1552,  0.0551, -0.3353, -0.1943,  0.4581, -0.0999,  0.1734,
          0.1993,  0.1247, -0.1889, -0.3502],
        [-0.4493, -0.5241, -0.0430,  0.4444, -0.1634, -0.2399,  0.1182,  0.1687,
         -0.7736, -0.1610,  0.0518, -0.3378, -0.1978,  0.4635, -0.1035,  0.1804,
          0.2055,  0.1301, -0.1955, -0.3525]], grad_fn=<DivBackward0>)
tensor([[-0.2221, -0.2577, -0.0208,  0.2190, -0.0794, -0.1171,  0.0586,  0.0834,
         -0.3810, -0.0776,  0.0276,

tensor(-0.2204, grad_fn=<SumBackward0>)