# Create a neural network

This tutorial refers to https://gluon-crash-course.mxnet.io/nn.html

"nd" means the NDArray package and<br>
"nn" means the neural network package

In [2]:
from mxnet import nd
from mxnet.gluon import nn

## 1. Create a first layer of neural network

Make a dense layer with 2 output units

In [3]:
layer = nn.Dense(2)
layer

Dense(None -> 2, linear)

Then initialize its weights with the default initialization method, which draws random values uniformly from [−0.7,0.7]<br>
TODO: So what size does it have internally?

In [4]:
layer.initialize()
layer

Dense(None -> 2, linear)

 create a (3,4) shape random input x which draws random values uniformly from [-1,1]

In [5]:
x = nd.random.uniform(-1,1,(3,4))
x


[[ 0.09762704  0.18568921  0.43037868  0.6885315 ]
 [ 0.20552671  0.71589124  0.08976638  0.6945034 ]
 [-0.15269041  0.24712741  0.29178822 -0.23123658]]
<NDArray 3x4 @cpu(0)>

In [8]:
layer(x)


[[-0.02524133 -0.00874885]
 [-0.06026538 -0.01308061]
 [ 0.02468396 -0.02181557]]
<NDArray 3x2 @cpu(0)>

the layer’s input limit of 2 produced a (3,2) shape output from our (3,4) input.<br>
the system will automatically infer it during the first time we feed in data, create and initialize the weights.<br>
so, inside the layer's size is (4,2)

(3,4) * (4,2) = (3,2)

we can access the weight after the first forward pass<br>

In [9]:
layer.weight.data()


[[-0.00873779 -0.02834515  0.05484822 -0.06206018]
 [ 0.06491279 -0.03182812 -0.01631819 -0.00312688]]
<NDArray 2x4 @cpu(0)>

## 2. Chain layers into a neural network

It is a simple case that a neural network is a chain of layers.<br>
During the forward pass, it runs layers sequentially one-by-one.<br>
The following code implements a famous network called LeNet through nn.Sequential

In [7]:
net = nn.Sequential()
net.add(
    # Similar to Dense, it is not necessary to specify the
    # input channels by the argument `in_channels`, which will be
    # automatically inferred in the first forward pass. Also,
    # we apply a relu activation on the output.
    #
    # In addition, we can use a tuple to specify a
    # non-square kernel size, such as `kernel_size=(2,4)`
    nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
    # One can also use a tuple to specify non-symmetric
    # pool and stride sizes
    nn.MaxPool2D(pool_size=2, strides=2),
    nn.Conv2D(channels=16, kernel_size=3, activation='relu'),
    nn.MaxPool2D(pool_size=2, strides=2),
    # flatten the 4-D input into 2-D with shape
    # `(x.shape[0], x.size/x.shape[0])` so that it can be used
    # by the following dense layers
    nn.Flatten(),
    nn.Dense(120, activation="relu"),
    nn.Dense(84, activation="relu"),
    nn.Dense(10)
)

In [8]:
net

Sequential(
  (0): Conv2D(None -> 6, kernel_size=(5, 5), stride=(1, 1))
  (1): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (2): Conv2D(None -> 16, kernel_size=(3, 3), stride=(1, 1))
  (3): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (4): Flatten
  (5): Dense(None -> 120, Activation(relu))
  (6): Dense(None -> 84, Activation(relu))
  (7): Dense(None -> 10, linear)
)

The usage of nn.Sequential is similar to nn.Dense.<br>
In fact, both of them are subclasses of nn.Block. <br>
The following codes show how to initialize the weights and run the forward pass.

In [9]:
net.initialize()
x = nd.random.uniform(shape=(4,1,28,28)) # Input shape is (batch_size, color_channels, height, width)
y = net(x)
y.shape

the following accesses the 1st layer’s weight and 6th layer’s bias.

In [19]:
(net[0].weight.data().shape, net[5].bias.data().shape)

((6, 1, 5, 5), (120,))

# 2. Create a neural network flexibly

another way to construct a network with a flexible forward function.

To do it, first create a subclass of nn.Block and implement two methods:

* \_\_init\_\_ create the layers
* forward define the forward function.

In [12]:
class MixMLP(nn.Block):
    def __init__(self, **kwargs):
        super(MixMLP, self).__init__(**kwargs)
        self.blk = nn.Sequential()
        self.blk.add(nn.Dense(3, activation='relu'), #2,3
                    nn.Dense(4, activation='relu')) #3,4
        self.dense = nn.Dense(5) #4,5
        
    def forward(self, x):
        y = nd.relu(self.blk(x))
        print(y)
        return self.dense(y)

In [13]:
net = MixMLP()
net

MixMLP(
  (blk): Sequential(
    (0): Dense(None -> 3, Activation(relu))
    (1): Dense(None -> 4, Activation(relu))
  )
  (dense): Dense(None -> 5, linear)
)

In [16]:
net.initialize()
x = nd.random.uniform(shape=(2,2))
x


[[6.2898183e-01 5.5350363e-05]
 [8.7265068e-01 3.1186023e-01]]
<NDArray 2x2 @cpu(0)>

In [17]:
net(x)


[[0. 0. 0. 0.]
 [0. 0. 0. 0.]]
<NDArray 2x4 @cpu(0)>



[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
<NDArray 2x5 @cpu(0)>

access a particular layer’s weight

In [18]:
net.blk[1].weight.data()


[[ 0.06339083 -0.00614183  0.02624836]
 [-0.00232279 -0.03982893  0.04042352]
 [ 0.06263188 -0.03787814  0.03231981]
 [ 0.05324166 -0.03444817 -0.02608307]]
<NDArray 4x3 @cpu(0)>