<a href="https://colab.research.google.com/github/anubhavgupta1/Dive-Into-Deep-Learning/blob/main/Deep%20Learning%20Computation/Layers%20and%20Blocks/pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  Deep Learning Computation

In [None]:
!pip install d2l==0.16.1


In [2]:
import torch
from torch import nn
from torch.nn import functional as F

In [3]:
X = torch.rand(2, 20)

### A Custom Block

In [4]:
net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

In [5]:
print(net)

Sequential(
  (0): Linear(in_features=20, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


In [6]:
print(net(X))

tensor([[-0.1362, -0.1454, -0.2778,  0.0576,  0.0218,  0.0943,  0.0904, -0.2205,
          0.0724,  0.1856],
        [ 0.1261, -0.1266, -0.1874,  0.0770,  0.0214,  0.1256,  0.2203, -0.0679,
          0.1863,  0.0685]], grad_fn=<AddmmBackward>)


### MLP Block

In [7]:
class MLP(nn.Module):
    # Declare a layer with model parameters. Here, we declare two fully
    # connected layers
    def __init__(self):
        # Call the constructor of the `MLP` parent class `Block` to perform
        # the necessary initialization. In this way, other function arguments
        # can also be specified during class instantiation, such as the model
        # parameters, `params` (to be described later)
        super().__init__()
        self.hidden = nn.Linear(20, 256)  # Hidden layer
        self.out = nn.Linear(256, 10)  # Output layer

    # Define the forward propagation of the model, that is, how to return the
    # required model output based on the input `X`
    def forward(self, X):
        # Note here we use the funtional version of ReLU defined in the
        # nn.functional module.
        return self.out(F.relu(self.hidden(X)))

In [8]:
net = MLP()
print(net)

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)


In [9]:
print(net(X))

tensor([[-0.3355, -0.1387, -0.0228, -0.2143, -0.0566, -0.2244, -0.1876, -0.0995,
          0.2165,  0.2100],
        [-0.3842, -0.1846,  0.0411, -0.3238, -0.1191, -0.1736, -0.0848, -0.1702,
          0.3924,  0.2264]], grad_fn=<AddmmBackward>)


### The Sequential Block

In [10]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            # Here, `block` is an instance of a `Module` subclass. We save it
            # in the member variable `_modules` of the `Module` class, and its
            # type is OrderedDict
            self._modules[block] = block

    def forward(self, X):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        for block in self._modules.values():
            X = block(X)
        return X

In [11]:
net = MySequential()
print(net)

MySequential()


In [12]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

In [13]:
print(net(X))

tensor([[ 0.0815, -0.0451,  0.0673,  0.0729,  0.0160,  0.1364,  0.0418,  0.1265,
          0.0098,  0.1743],
        [ 0.1499, -0.1856, -0.1090,  0.1220,  0.1083,  0.1272,  0.1133,  0.1505,
         -0.0024,  0.1488]], grad_fn=<AddmmBackward>)


### Ensemble Block

In [14]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # Random weight parameters that will not compute gradients and
        # therefore keep constant during training
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        # Use the created constant parameters, as well as the `relu` and `mm`
        # functions
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        # Reuse the fully-connected layer. This is equivalent to sharing
        # parameters with two fully-connected layers
        X = self.linear(X)
        # Control flow
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

In [15]:
net = FixedHiddenMLP()
print(net)

FixedHiddenMLP(
  (linear): Linear(in_features=20, out_features=20, bias=True)
)


In [16]:
print(net(X))

tensor(0.3358, grad_fn=<SumBackward0>)


In [17]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                 nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))

In [18]:
net = NestMLP()
print(net)

NestMLP(
  (net): Sequential(
    (0): Linear(in_features=20, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
  )
  (linear): Linear(in_features=32, out_features=16, bias=True)
)


In [19]:
print(net(X))

tensor([[-0.0430, -0.1136,  0.0344,  0.0052,  0.0080,  0.0799, -0.1661,  0.0553,
          0.1131, -0.1121, -0.0420, -0.1781,  0.0803, -0.0538,  0.0892,  0.0685],
        [-0.0614, -0.0389,  0.0563, -0.0055, -0.0073,  0.1124, -0.1901,  0.0569,
          0.1194, -0.0985, -0.0510, -0.1804,  0.0882, -0.0835,  0.0423,  0.1408]],
       grad_fn=<AddmmBackward>)


In [20]:
chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
print(chimera)

Sequential(
  (0): NestMLP(
    (net): Sequential(
      (0): Linear(in_features=20, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=32, bias=True)
      (3): ReLU()
    )
    (linear): Linear(in_features=32, out_features=16, bias=True)
  )
  (1): Linear(in_features=16, out_features=20, bias=True)
  (2): FixedHiddenMLP(
    (linear): Linear(in_features=20, out_features=20, bias=True)
  )
)


In [21]:
print(chimera(X))

tensor(0.0617, grad_fn=<SumBackward0>)
