In [1]:
import torch
import torch.nn as nn
from d2l import torch as d2l

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
    def forward(self, X):
        return self.out(torch.relu(self.hidden(X)))

In [3]:
net=MLP()
X=torch.rand(size=(2,20))
net(X)

tensor([[ 0.0146,  0.4056, -0.3558, -0.0499, -0.0082, -0.1454,  0.0972, -0.0439,
          0.1269,  0.0276],
        [ 0.0386,  0.3212, -0.2367, -0.0392, -0.0378, -0.2858,  0.1381,  0.1412,
          0.1497, -0.1868]], grad_fn=<AddmmBackward0>)

In [4]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            self.add_module(str(idx), module)
    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X

net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[ 0.2357, -0.0756,  0.1200,  0.3289,  0.1950,  0.1338,  0.0112, -0.1456,
          0.0513,  0.0573],
        [ 0.1810, -0.0552,  0.1006,  0.2838,  0.0055,  0.1272,  0.0006,  0.0419,
         -0.1075, -0.0727]], grad_fn=<AddmmBackward0>)

In [5]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)
    def forward(self, X):
        X = self.linear(X)
        X = torch.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

net = FixedHiddenMLP()
net(X)

tensor(0.0774, grad_fn=<SumBackward0>)

In [6]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)
    def forward(self, X):
        return self.linear(self.net(X))

net=NestMLP()
net(X)

tensor([[ 0.0778,  0.0304,  0.1429,  0.0131, -0.0417, -0.1951, -0.0681, -0.0887,
         -0.0601, -0.0699,  0.2536, -0.0038, -0.0947, -0.0445, -0.0098, -0.0078],
        [ 0.1070,  0.0964,  0.0848,  0.0278, -0.0189, -0.1515, -0.1297, -0.0761,
         -0.0070, -0.0355,  0.1636, -0.0127, -0.0381, -0.0118,  0.0578, -0.0271]],
       grad_fn=<AddmmBackward0>)

In [7]:
net = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
net(X)

tensor(-0.2209, grad_fn=<SumBackward0>)