<a href="https://colab.research.google.com/github/anubhavgupta1/Dive-Into-Deep-Learning/blob/main/Deep%20Learning%20Computation/Layers%20and%20Blocks/mxnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  Deep Learning Computation

In [None]:
!pip install d2l==0.16.1
!pip install -U mxnet-cu101==1.7.0

In [2]:
from mxnet import np, npx
from mxnet.gluon import nn
npx.set_np()

In [3]:
X = np.random.uniform(size=(2, 20))

### A Custom Block

In [4]:
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()

In [5]:
print(net)

Sequential(
  (0): Dense(-1 -> 256, Activation(relu))
  (1): Dense(-1 -> 10, linear)
)


In [6]:
print(net(X))

[[ 0.06240274 -0.03268593  0.02582653  0.02254181 -0.03728798 -0.04253785
   0.00540612 -0.01364185 -0.09915454 -0.02272737]
 [ 0.02816679 -0.03341204  0.03565665  0.02506384 -0.04136416 -0.04941844
   0.01738529  0.01081963 -0.09932579 -0.01176296]]


### MLP Block

In [7]:
class MLP(nn.Block):
    # Declare a layer with model parameters. Here, we declare two
    # fully-connected layers
    def __init__(self, **kwargs):
        # Call the constructor of the `MLP` parent class `Block` to perform
        # the necessary initialization. In this way, other function arguments
        # can also be specified during class instantiation, such as the model
        # parameters, `params` (to be described later)
        super().__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu')  # Hidden layer
        self.out = nn.Dense(10)  # Output layer

    # Define the forward propagation of the model, that is, how to return the
    # required model output based on the input `X`
    def forward(self, X):
        return self.out(self.hidden(X))

In [8]:
net = MLP()
print(net)

MLP(
  (hidden): Dense(-1 -> 256, Activation(relu))
  (out): Dense(-1 -> 10, linear)
)


In [9]:
net.initialize()
print(net(X))

[[-0.03989595 -0.10414709  0.06799038  0.05245074  0.0252606  -0.00640342
   0.04182098 -0.01665318 -0.02067345 -0.07863816]
 [-0.03612847 -0.07210435  0.09159479  0.07890773  0.02494171 -0.01028665
   0.01732427 -0.02843244  0.03772651 -0.06671703]]


### The Sequential Block

In [10]:
class MySequential(nn.Block):
    def add(self, block):
        # Here, `block` is an instance of a `Block` subclass, and we assume
        # that it has a unique name. We save it in the member variable
        # `_children` of the `Block` class, and its type is OrderedDict. When
        # the `MySequential` instance calls the `initialize` function, the
        # system automatically initializes all members of `_children`
        self._children[block.name] = block

    def forward(self, X):
        # OrderedDict guarantees that members will be traversed in the order
        # they were added
        for block in self._children.values():
            X = block(X)
        return X

In [11]:
net = MySequential()
print(net)

MySequential(

)


In [12]:
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()

In [13]:
print(net._children.values())

odict_values([Dense(-1 -> 256, Activation(relu)), Dense(-1 -> 10, linear)])


In [14]:
print(net(X))

[[-0.0764568  -0.01130233  0.04952145 -0.04651389 -0.04131571 -0.05884131
  -0.06213811  0.01311471 -0.01379425 -0.02514282]
 [-0.05124623  0.00711232 -0.00155933 -0.07555379 -0.06675334 -0.01762914
   0.00589085  0.0144719  -0.04330775  0.03317727]]


### Ensemble Block

In [15]:
class FixedHiddenMLP(nn.Block):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        # Random weight parameters created with the `get_constant` function
        # are not updated during training (i.e., constant parameters)
        self.rand_weight = self.params.get_constant(
            'rand_weight', np.random.uniform(size=(20, 20)))
        self.dense = nn.Dense(20, activation='relu')

    def forward(self, X):
        X = self.dense(X)
        # Use the created constant parameters, as well as the `relu` and `dot`
        # functions
        X = npx.relu(np.dot(X, self.rand_weight.data()) + 1)
        # Reuse the fully-connected layer. This is equivalent to sharing
        # parameters with two fully-connected layers
        X = self.dense(X)
        # Control flow
        while np.abs(X).sum() > 1:
            X /= 2
        return X.sum()

In [16]:
net = FixedHiddenMLP()
print(net)

FixedHiddenMLP(
  (dense): Dense(-1 -> 20, Activation(relu))
)


In [17]:
net.initialize()
print(net(X))

0.52637565


In [18]:
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(nn.Dense(64, activation='relu'),
                     nn.Dense(32, activation='relu'))
        self.dense = nn.Dense(16, activation='relu')

    def forward(self, X):
        return self.dense(self.net(X))

In [19]:
net = NestMLP()
print(net)

NestMLP(
  (net): Sequential(
    (0): Dense(-1 -> 64, Activation(relu))
    (1): Dense(-1 -> 32, Activation(relu))
  )
  (dense): Dense(-1 -> 16, Activation(relu))
)


In [20]:
net.initialize()
print(net(X))

[[ 2.9371739e-03  5.7932874e-04 -0.0000000e+00 -0.0000000e+00
  -0.0000000e+00 -0.0000000e+00 -0.0000000e+00 -0.0000000e+00
   5.9629581e-03  3.4165871e-03 -0.0000000e+00  1.2378028e-03
   2.7629118e-03  2.5672135e-03 -0.0000000e+00 -0.0000000e+00]
 [ 2.4156112e-03  3.6840735e-05 -0.0000000e+00 -0.0000000e+00
  -0.0000000e+00  6.4551947e-04 -0.0000000e+00 -0.0000000e+00
   2.2474134e-03  5.1313322e-03 -0.0000000e+00  1.4229940e-03
   2.8961021e-03  4.5566252e-03 -0.0000000e+00 -0.0000000e+00]]


In [21]:
chimera = nn.Sequential()
chimera.add(NestMLP(), nn.Dense(20), FixedHiddenMLP())
print(chimera)

Sequential(
  (0): NestMLP(
    (net): Sequential(
      (0): Dense(-1 -> 64, Activation(relu))
      (1): Dense(-1 -> 32, Activation(relu))
    )
    (dense): Dense(-1 -> 16, Activation(relu))
  )
  (1): Dense(-1 -> 20, linear)
  (2): FixedHiddenMLP(
    (dense): Dense(-1 -> 20, Activation(relu))
  )
)


In [22]:
chimera.initialize()
chimera(X)

array(0.55778044)