<a href="https://colab.research.google.com/github/arnav39/d2el-en/blob/main/6_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 6.1 : Layers and Modules

In [1]:
import torch
from torch import nn
from torch.nn import functional as F

In [2]:
net = nn.Sequential(nn.LazyLinear(256),
                    nn.ReLU(),
                    nn.LazyLinear(10))



In [11]:
X = torch.randn(2, 20)
net(X).shape

Linear(in_features=20, out_features=256, bias=True)
ReLU()
Linear(in_features=256, out_features=10, bias=True)


torch.Size([2, 10])

In [4]:
net.__call__(X).shape

torch.Size([2, 10])

In [5]:
class MLP(nn.Module):

  def __init__(self):
    super().__init__()
    self.hidden = nn.LazyLinear(256)
    self.out = nn.LazyLinear(10)

  def forward(self, X):
    return self.out(F.relu(self.hidden(X)))

In [6]:
net = MLP()
net(X).shape



torch.Size([2, 10])

In [7]:
nn.Sequential.add_module??

In [44]:
class MySequential(nn.Module):
  
  def __init__(self, *args):
    super().__init__()
    for idx, module in enumerate(args):
      self.add_module(str(idx), module)
    
  def forward(self, X):
    for module in self.children():
      print(module)
      X = module(X)
    return X

In [45]:
net = MySequential(nn.LazyLinear(256), nn.ReLU(), nn.LazyLinear(10))
net(X).shape

LazyLinear(in_features=0, out_features=256, bias=True)
ReLU()
LazyLinear(in_features=0, out_features=10, bias=True)




torch.Size([2, 10])

In [13]:

class FixedHiddenMLP(nn.Module):

  def __init__(self):
    super().__init__()
    self.rand_weight = torch.rand(20, 20) # keeps constant during training phase
    self.linear = nn.LazyLinear(20)

  def forward(self, X):
    X = self.linear(X)
    X = F.relu(torch.mm(X, self.rand_weight) + 1)
    X = self.linear(X)

    # control flow : 
    while X.abs().sum() > 1:
      X /= 2
    return X.sum()

In [15]:
net = FixedHiddenMLP()
net(X).shape
print(net(X))

tensor(-0.3517, grad_fn=<SumBackward0>)




In [16]:
class NestMLP(nn.Module):

  def __init__(self):
    super().__init__()
    self.net = nn.Sequential(nn.LazyLinear(64), nn.ReLU(),
                             nn.LazyLinear(32), nn.ReLU())
    self.linear = nn.LazyLinear(16)

  def forward(self, X):
    return self.linear(self.net(X))

In [17]:
chimera = nn.Sequential(NestMLP(), nn.LazyLinear(20), FixedHiddenMLP())



In [18]:
chimera(X)

tensor(0.0050, grad_fn=<SumBackward0>)

## Ex 6.1 :

### Q1:

- state_dic : Python dictionary that maps each layer to its parameter tensor
- when we use list instead of add_module the state_dict is empty

In [19]:
class ListNet(nn.Module):

  def __init__(self, *args):
    super().__init__()
    self.net = []
    for module in args:
      self.net.append(module)

  def forward(self, X):
    for module in self.net:
      X = module(X)
    return X

In [21]:
net = ListNet(nn.LazyLinear(256), nn.ReLU(), nn.LazyLinear(128), nn.ReLU(), nn.LazyLinear(10))
net(X)

tensor([[ 0.0274,  0.0260,  0.1973,  0.0147,  0.0318,  0.0287, -0.0385, -0.1426,
         -0.1011, -0.0644],
        [-0.0413, -0.0193,  0.0711, -0.0793,  0.0913, -0.0307, -0.0787, -0.0820,
         -0.0446, -0.0593]], grad_fn=<AddmmBackward0>)

In [22]:
net.parameters()

<generator object Module.parameters at 0x7f1359a0ec10>

In [24]:
net.state_dict()

OrderedDict()

In [25]:
net2 = nn.Sequential(nn.LazyLinear(256), nn.ReLU(), nn.LazyLinear(128), nn.ReLU(), nn.LazyLinear(10))
net2(X)



tensor([[ 0.0505, -0.1869,  0.1089, -0.2721,  0.2790,  0.3549,  0.0273, -0.1172,
         -0.0839,  0.1032],
        [ 0.0409, -0.0927,  0.0719, -0.1463,  0.3058,  0.2286,  0.0459, -0.0259,
          0.0939, -0.0117]], grad_fn=<AddmmBackward0>)

In [26]:
net2.parameters()

<generator object Module.parameters at 0x7f1359a0e900>

In [27]:
net2.state_dict()

OrderedDict([('0.weight',
              tensor([[ 0.1522,  0.1257, -0.0810,  ...,  0.0219, -0.2201, -0.1467],
                      [ 0.1635,  0.0977, -0.0466,  ..., -0.0032,  0.1955, -0.1167],
                      [-0.2150, -0.2152,  0.0601,  ..., -0.1217, -0.0639, -0.1730],
                      ...,
                      [ 0.1438, -0.2214,  0.1239,  ...,  0.2232, -0.1249,  0.1647],
                      [-0.1176,  0.0283,  0.1825,  ..., -0.1848,  0.0491, -0.2024],
                      [ 0.1709, -0.0777, -0.0927,  ...,  0.1083,  0.1943, -0.1743]])),
             ('0.bias',
              tensor([ 3.7598e-02,  1.0214e-01, -2.0139e-01, -6.8573e-02,  3.2602e-02,
                       4.0698e-02,  1.3531e-01, -1.4177e-01, -2.5217e-02,  1.2709e-01,
                       2.1259e-01, -6.8385e-02,  7.0430e-02, -1.3538e-01, -1.0265e-01,
                       1.5972e-01, -1.1400e-01, -6.8815e-02,  8.2187e-02, -3.2376e-02,
                      -2.9798e-02,  1.8135e-01, -2.0759e-01,  1.7790

### Q2:

In [40]:
class Parallel(nn.Module):

  def __init__(self, net1, net2):
    super().__init__()
    self.net = nn.Sequential(net1, net2)

  def forward(self, X):
    return self.net(X)

In [36]:
net1 = nn.Sequential(nn.LazyLinear(20), nn.ReLU(), nn.LazyLinear(256), nn.ReLU(), nn.LazyLinear(10))
net2 = nn.Sequential(nn.LazyLinear(40), nn.ReLU(), nn.LazyLinear(80), nn.ReLU(), nn.LazyLinear(1))

In [41]:
parallel_net = Parallel(net1=net1,net2=net2)
parallel_net(X)

tensor([[0.0616],
        [0.0693]], grad_fn=<AddmmBackward0>)

### Q3:

In [46]:
class Network(nn.Module):

  def __init__(self, net, k):
    super().__init__()
    self.k = k # number of times to concatenate the network
    for i in range(k):
      self.add_module(str(i), net)

  def forward(self, X):
    for module in self.children():
      X = module(X)
    return X

In [47]:
inst = nn.Sequential(nn.LazyLinear(128), nn.ReLU(), nn.LazyLinear(64), nn.ReLU(), nn.LazyLinear(512), nn.ReLU(), nn.LazyLinear(1))
net = Network(inst, 100)
net(X)



tensor([[0.0553],
        [0.0169]], grad_fn=<AddmmBackward0>)

In [48]:
net

Network(
  (0): Sequential(
    (0): Linear(in_features=20, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=1, bias=True)
  )
  (1): Sequential(
    (0): Linear(in_features=20, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=1, bias=True)
  )
  (2): Sequential(
    (0): Linear(in_features=20, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=1, bias=True)
  )
  (3): Sequential(
    (0): Linear(in_features=20