In [1]:
import torch 
from torch import nn
from torch.nn import functional as F

In [2]:
# Creating MLP with one hidden layer with 256 hidden units and 10 output units

class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.LazyLinear(256)
        self.out = nn.LazyLinear(10)

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))
    
net = MLP()



In [4]:
# Constructing our class with same functionality of the default Sequential class

class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            self.add_module(str(idx), module) # A method for appending modules one by one to a list

    def forward(self, X):
        for module in self.children():
            X = module(X)
        return X
    
net = MySequential(nn.LazyLinear(256), nn.ReLU(), nn.LazyLinear(10))



suppose calculating $f(\mathbf{x},\mathbf{w}) = c \cdot \mathbf{w}^\top \mathbf{x}$,
where $\mathbf{x}$ is the input, $\mathbf{w}$ is our parameter,
and $c$ is some specified constant
that is not updated during optimization.

In [6]:
# Executing code in the forward propagation method

class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20)) 
        self.linear = nn.LazyLinear(20)

    def forward(self, X):
        X = self.linear(X)
        X = F.relu(X @ self.rand_weight + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()
    
net = FixedHiddenMLP()



In [9]:
# MIX-AND-MATCH various ways of assembling modules together

class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.LazyLinear(64), nn.ReLU(),
                                 nn.LazyLinear(32), nn.ReLU())
        self.linear = nn.LazyLinear(16)
    
    def forward(self, x):
        return self.linear(self.net(x))
    
chimera = nn.Sequential(NestMLP(), nn.LazyLinear(20), FixedHiddenMLP())
X = torch.rand(2, 20)
chimera(X)

tensor(-0.2280, grad_fn=<SumBackward0>)

In [10]:
# Individual Layers can be modules
# Many Layers can comprise a module
# Many modules can comprise a module