## Layers and modules 
- for complex networks, use network modules(nodule can be layer, multi-layer or an entire model)
- module is is a class

In [1]:
import torch
from torch import nn
from torch.nn import functional as F

In [6]:
net = nn.Sequential(nn.LazyLinear(256), nn.ReLU(), nn.LazyLinear(10)) # Sequential is a type of module
X = torch.rand(2, 20)
net(X).shape

torch.Size([2, 10])

In [8]:
# custom module
class MLP(nn.Module):
    def __init__(self):
        # call parent constructor to init
        super().__init__()
        self.hidden = nn.LazyLinear(256)
        self.out = nn.LazyLinear(10)
    
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

In [11]:
net = MLP()
net(X).shape

torch.Size([2, 10])

In [12]:
class MySequential(nn.Module):
    def __init__(self, *args) -> None:
        super().__init__()
        for idx, module in enumerate(args):
            self.add_module(str(idx), module)
    
    def forward(self, X):
        for module in self.children():
            X = module(X)
        return X

In [14]:
net = MySequential(nn.LazyLinear(256), nn.ReLU(), nn.LazyLinear(10))
net(X).shape

torch.Size([2, 10])

In [18]:
# extra code in forward propogation... eg. adding constant
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # Random weight parameters that will not compute gradients and
        # therefore keep constant during training
        self.rand_weight = torch.rand((20, 20))
        self.linear = nn.LazyLinear(20)
        
    def forward(self, X):
        X = self.linear(X)
        X = F.relu(X @ self.rand_weight + 1)
         #Reuse the fully connected layer. This is equivalent to sharing
        # parameters with two fully connected layers
        X = self.linear(X)
        # control flow - keep halving until sum <=1
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

In [21]:
net = FixedHiddenMLP()
net(X)

tensor(-0.0072, grad_fn=<SumBackward0>)

In [23]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.LazyLinear(64), nn.ReLU(), nn.LazyLinear(32), nn.ReLU())
        self.linear = nn.LazyLinear(16)
    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.LazyLinear(20), FixedHiddenMLP())#
chimera(X)

tensor(0.0999, grad_fn=<SumBackward0>)