In [1]:
import torch
from torch import nn
from torch.nn import functional as F

In [2]:
net = nn.Sequential(nn.LazyLinear(256), nn.ReLU(), nn.LazyLinear(10))

X = torch.rand(2, 20)
net(X).shape



torch.Size([2, 10])

Custom Modules

Perhaps the easiest way to develop intuition about how a module works is to implement one ourselves. Before we implement our own custom module, we briefly summarize the basic functionality that each module must provide:

1. Ingest input data as arguments to its forward propagation method.

2. Generate an output by having the forward propagation method return a value. Note that the output may have a different shape from the input. For example, the first fully connected layer in our model above ingests an input of arbitrary dimension but returns an output of dimension 256.

3. Calculate the gradient of its output with respect to its input, which can be accessed via its backpropagation method. Typically this happens automatically.

4. Store and provide access to those parameters necessary to execute the forward propagation computation.

5. Initialize model parameters as needed.

In [1]:
class MLP(nn.Module):
    def __init__(self):
        # Call the constructor of the parent class nn.Module to perform
        # the necessary initialization
        super().__init__()
        self.hidden = nn.LazyLinear(256)
        self.out = nn.LazyLinear(10)

    # Define the forward propagation of the model, that is, how to return the
    # required model output based on the input X
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

NameError: name 'nn' is not defined

In [8]:
net = MLP()
net(X).shape




<bound method Module._call_impl of MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)>

In [46]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            self.add_module(str(idx), module)

    def forward(self, X):
        for module in self.children():
            X = module(X)
        return X

In [53]:
net = MySequential(nn.LazyLinear(256), nn.ReLU(), nn.LazyLinear(10))
net(X)
for param in net.parameters():
    print(param)


Parameter containing:
tensor([[-0.1122,  0.0859, -0.2232,  ..., -0.0600, -0.0186,  0.1708],
        [ 0.0847, -0.2044,  0.0703,  ...,  0.0160,  0.1959, -0.0731],
        [ 0.0519, -0.0074, -0.2062,  ...,  0.1979,  0.1533, -0.1174],
        ...,
        [ 0.0142,  0.1063,  0.1867,  ...,  0.2196,  0.2173,  0.0346],
        [-0.0436,  0.0210,  0.0175,  ...,  0.1043,  0.1294, -0.0509],
        [-0.2066,  0.0915,  0.1587,  ...,  0.0182,  0.1433,  0.1089]],
       requires_grad=True)
Parameter containing:
tensor([-0.0744, -0.0752, -0.0932,  0.0568, -0.0046,  0.1442, -0.0900,  0.1181,
        -0.0853,  0.0547, -0.1647, -0.0904,  0.1134,  0.1859, -0.1241,  0.1729,
        -0.0707,  0.1233,  0.1122, -0.1999,  0.0577, -0.0870,  0.0948, -0.0215,
         0.0444, -0.2002,  0.1599, -0.1995, -0.2103,  0.0156, -0.0781, -0.1529,
         0.0795,  0.1441,  0.1189, -0.1591, -0.1069, -0.1543,  0.0878,  0.0084,
        -0.0670,  0.2094, -0.2191,  0.0397, -0.1138, -0.0639,  0.0694,  0.2042,
         0.0322

In [16]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # Random weight parameters that will not compute gradients and
        # therefore keep constant during training
        self.rand_weight = torch.rand((20, 20))
        self.linear = nn.LazyLinear(20)

    def forward(self, X):
        X = self.linear(X)
        X = F.relu(X @ self.rand_weight + 1)
        # Reuse the fully connected layer. This is equivalent to sharing
        # parameters with two fully connected layers
        X = self.linear(X)
        # Control flow
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

In [21]:
net = FixedHiddenMLP()
net(X)

tensor(0.2227, grad_fn=<SumBackward0>)

We can mix and match various ways of assembling modules together. In the following example, we nest modules in some creative ways.

In [22]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.LazyLinear(64), nn.ReLU(),
                                 nn.LazyLinear(32), nn.ReLU())
        self.linear = nn.LazyLinear(16)

    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.LazyLinear(20), FixedHiddenMLP())
chimera(X)

tensor(0.1063, grad_fn=<SumBackward0>)

Exercises

1. What kinds of problems will occur if you change MySequential to store modules in a Python list?

In [49]:
class MySequentialEx(nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.net = [module for module in args]

    def forward(self, X):
        for module in self.net:
            X = module(X)
        return X

In [50]:
net = MySequentialEx(nn.LazyLinear(256), nn.ReLU(), nn.LazyLinear(10))
net(X)

tensor([[-0.1412, -0.0165,  0.0156, -0.0573,  0.0665, -0.0294, -0.2216,  0.0310,
          0.1866,  0.0849],
        [-0.2358, -0.1653,  0.0419, -0.0169,  0.0521, -0.0684, -0.3120,  0.0475,
         -0.0205,  0.1414]], grad_fn=<AddmmBackward0>)

In [51]:
for param in net.parameters():
    print(param)
# Notice there are no parameters

2. Implement a module that takes two modules as an argument, say net1 and net2 and returns the concatenated output of both networks in the forward propagation. This is also called a parallel module.

In [None]:
class ParallelModule(nn.Module):
    def __init__(self, net1, net2):
        super().__init__()
        self.net1 = net1
        self.net2 = net2
    
    def forward(self, X):
        return nn.cat(self.net1(X), self.net2(X))

3. Assume that you want to concatenate multiple instances of the same network. Implement a factory function that generates multiple instances of the same module and build a larger network from it.

In [65]:
class Factory(nn.Module):
    def __init__(self, module, k):
        super().__init__()
        self.net = nn.Sequential()
        for i in range(k):
            nn.Sequential.add_module(str(i), module)

    def forward(self, X):
        return self.net(X)

In [63]:
net = Factory(nn.LazyLinear(10),2)

TypeError: Module.add_module() missing 1 required positional argument: 'module'