In [1]:
import torch
from torch import nn 
from torch.nn import functional as F

In [2]:
net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
X = torch.rand(2, 20)
print(X)
print(net(X))

tensor([[0.2122, 0.8667, 0.8356, 0.6748, 0.6246, 0.8330, 0.2675, 0.4658, 0.1512,
         0.8788, 0.4592, 0.2528, 0.6656, 0.5745, 0.2487, 0.2348, 0.5549, 0.9600,
         0.6651, 0.7450],
        [0.9563, 0.1961, 0.7930, 0.9803, 0.7252, 0.2000, 0.9701, 0.3231, 0.4223,
         0.0313, 0.1668, 0.3774, 0.2612, 0.7354, 0.1823, 0.9574, 0.7035, 0.8729,
         0.3655, 0.4038]])
tensor([[ 0.0499, -0.4434, -0.0598,  0.1889,  0.0267,  0.2237,  0.1635, -0.1801,
         -0.2194,  0.1078],
        [ 0.0097, -0.3750, -0.0216,  0.1647,  0.0469,  0.2779,  0.1025, -0.1826,
         -0.2476,  0.1931]], grad_fn=<AddmmBackward0>)


In [3]:
class MLP(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)    
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))

In [4]:
net = MLP()#实例化MLPmodule
net(X)

tensor([[ 0.2480,  0.0422,  0.1888, -0.1539, -0.0480,  0.1858, -0.0886,  0.0476,
          0.0929, -0.1988],
        [ 0.3118, -0.0707,  0.2045, -0.2183, -0.0116,  0.0947, -0.0521,  0.0422,
         -0.0267, -0.1839]], grad_fn=<AddmmBackward0>)

In [5]:
#nn.sequential类模仿构造
class MySequential(nn.Module):
    def __init__(self,*args) -> None:
        super().__init__()
        for block in args:
            self._modules[block]=block#_modul是一个有序字典字典,可以用[]方式加入新键值对
    def forward(self,X):
        for block in self._modules.values():
            X=block(X)
        return X

net = MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net(X)

tensor([[-0.1862,  0.0073, -0.1145,  0.2264, -0.1626,  0.0995, -0.1073, -0.0213,
          0.0557, -0.0404],
        [-0.2116,  0.0516, -0.0081,  0.3255,  0.0442,  0.0327, -0.0600, -0.0513,
         -0.0088,  0.0199]], grad_fn=<AddmmBackward0>)

## 下面是实现$ f(\boldsymbol{x}, \boldsymbol{w})=c \cdot \boldsymbol{w}^{\top} \boldsymbol{x} $ 的类class

In [12]:
class FixedHiddenMLP(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        #不计算梯度的随机权重参数，因此其在训练期间也保持不变
        self.rand_weight = torch.rand((20,20),requires_grad=False)
        self.linear = nn.Linear(20,20)
    
    def forward(self,X):
        X = self.linear(X)
        # 使用创建的常量参数以及relu和mm函数
        X = F.relu(torch.mm(X,self.rand_weight)+1)
        # 复用全连接层相当于两个全连接层共享参数
        X = self.linear(X)
        # 控制流
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

In [13]:
net = FixedHiddenMLP()
print(X)
print(net(X))

tensor([[0.2122, 0.8667, 0.8356, 0.6748, 0.6246, 0.8330, 0.2675, 0.4658, 0.1512,
         0.8788, 0.4592, 0.2528, 0.6656, 0.5745, 0.2487, 0.2348, 0.5549, 0.9600,
         0.6651, 0.7450],
        [0.9563, 0.1961, 0.7930, 0.9803, 0.7252, 0.2000, 0.9701, 0.3231, 0.4223,
         0.0313, 0.1668, 0.3774, 0.2612, 0.7354, 0.1823, 0.9574, 0.7035, 0.8729,
         0.3655, 0.4038]])
tensor(0.0028, grad_fn=<SumBackward0>)


In [19]:
class NestMLP(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.net = nn.Sequential(nn.Linear(10,24),nn.ReLU(),nn.Linear(64,32),nn.ReLU())
        self.linear = nn.Linear(32,16)
    def forward(self,X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
print(X)
chimera(X)

tensor([[0.2122, 0.8667, 0.8356, 0.6748, 0.6246, 0.8330, 0.2675, 0.4658, 0.1512,
         0.8788, 0.4592, 0.2528, 0.6656, 0.5745, 0.2487, 0.2348, 0.5549, 0.9600,
         0.6651, 0.7450],
        [0.9563, 0.1961, 0.7930, 0.9803, 0.7252, 0.2000, 0.9701, 0.3231, 0.4223,
         0.0313, 0.1668, 0.3774, 0.2612, 0.7354, 0.1823, 0.9574, 0.7035, 0.8729,
         0.3655, 0.4038]])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x20 and 10x24)