<a href="https://colab.research.google.com/github/VirtueZhao/Dive_into_Deep_Learning/blob/main/5_1_Layers_and_Blocks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20,256),nn.ReLU(), nn.Linear(256,10))
X = torch.rand(2,20)
net(X)

tensor([[ 0.1656, -0.0791, -0.1423, -0.1286, -0.2268,  0.0812,  0.1525, -0.0625,
         -0.0389,  0.2103],
        [ 0.2113, -0.1490, -0.2432, -0.1408, -0.2733,  0.1499,  0.1506, -0.0350,
         -0.0520,  0.2659]], grad_fn=<AddmmBackward0>)

In [None]:
class MLP(nn.Module):
  def __init__(self):
    super().__init__()
    self.hidden = nn.Linear(20, 256)
    self.out = nn.Linear(256, 10)

  def forward(self, X):
    return self.out(F.relu(self.hidden(X)))

In [None]:
net = MLP()
net(X)

tensor([[ 0.0107,  0.0127, -0.1246,  0.0762, -0.0168,  0.2129, -0.2542, -0.0209,
         -0.0710, -0.4358],
        [ 0.1027,  0.1595, -0.1034, -0.0147, -0.0995,  0.1580, -0.2705, -0.0193,
         -0.1729, -0.4051]], grad_fn=<AddmmBackward0>)

In [None]:
class MySequential(nn.Module):
  def __init__(self, *args):
    super().__init__()
    for block in args:
      print("block: ", block)
      self._modules[block] = block

  def forward(self, X):
    for block in self._modules.values():
      print("block: ", block)
      print("X.shape: ", X.shape)
      X = block(X)
    return X

In [None]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

block:  Linear(in_features=20, out_features=256, bias=True)
block:  ReLU()
block:  Linear(in_features=256, out_features=10, bias=True)


In [None]:
net(X)

block:  Linear(in_features=20, out_features=256, bias=True)
X.shape:  torch.Size([2, 20])
block:  ReLU()
X.shape:  torch.Size([2, 256])
block:  Linear(in_features=256, out_features=10, bias=True)
X.shape:  torch.Size([2, 256])


tensor([[-6.6642e-02, -1.2212e-02, -2.8864e-01,  1.1474e-01,  6.2709e-02,
         -6.5087e-02, -1.5789e-01,  1.2593e-01,  6.4001e-02, -2.1273e-04],
        [ 1.1726e-01, -4.8543e-02, -3.1946e-01,  3.3828e-02,  1.6683e-01,
          2.1858e-02, -2.2797e-01,  5.9341e-02,  8.4259e-03,  8.0541e-02]],
       grad_fn=<AddmmBackward0>)

In [29]:
class FixedHiddenMLP(nn.Module):
  def __init__(self):
    super().__init__()
    self.rand_weight = torch.rand((20,20), requires_grad=False)
    # print("Random Constant Weight: ", self.rand_weight)
    self.linear = nn.Linear(20, 20)
  
  def forward(self, X):
    X = self.linear(X)
    X = F.relu(torch.mm(X, self.rand_weight) + 1)
    X = self.linear(X)

    while X.abs().sum() > 1:
      # print("X L1 Norm: ", X.abs().sum())
      X /= 2
    return X.sum()

In [None]:
net = FixedHiddenMLP()
net(X)

Random Constant Weight:  tensor([[0.7780, 0.6669, 0.0199, 0.3668, 0.0355, 0.4385, 0.8643, 0.5396, 0.2862,
         0.3566, 0.3343, 0.8176, 0.6625, 0.2531, 0.5812, 0.1201, 0.8012, 0.3972,
         0.3455, 0.5672],
        [0.0453, 0.8185, 0.3087, 0.4420, 0.3341, 0.8902, 0.0781, 0.4348, 0.6033,
         0.8551, 0.0284, 0.7537, 0.0423, 0.3991, 0.4727, 0.7780, 0.8208, 0.3093,
         0.2980, 0.6577],
        [0.5202, 0.9986, 0.2730, 0.0446, 0.2660, 0.9463, 0.2272, 0.5762, 0.5771,
         0.1202, 0.7382, 0.6326, 0.2167, 0.2782, 0.7541, 0.2141, 0.6906, 0.8236,
         0.7871, 0.1336],
        [0.9095, 0.9238, 0.3386, 0.9166, 0.0755, 0.7112, 0.0574, 0.2409, 0.4076,
         0.2596, 0.2366, 0.1707, 0.2993, 0.8163, 0.5489, 0.6811, 0.5454, 0.2092,
         0.9712, 0.6705],
        [0.5310, 0.9534, 0.9895, 0.0912, 0.3847, 0.9271, 0.1175, 0.2558, 0.1656,
         0.4235, 0.5459, 0.7059, 0.4238, 0.9901, 0.3741, 0.8011, 0.4908, 0.3588,
         0.7747, 0.4349],
        [0.4497, 0.3724, 0.0294, 0.

tensor(0.2990, grad_fn=<SumBackward0>)

In [30]:
class NestMLP(nn.Module):
  def __init__(self):
    super().__init__()
    self.net = nn.Sequential(nn.Linear(20,64), nn.ReLU(),
                  nn.Linear(64,32), nn.ReLU())
    self.linear = nn.Linear(32, 16)
  
  def forward(self, X):
    return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.Linear(16,20), FixedHiddenMLP())
chimera(X)

tensor(-0.1993, grad_fn=<SumBackward0>)