Not needed to specify inputs. Mxnet infers inputs whenever use the layer.

In [1]:
from mxnet import nd
from mxnet.gluon import nn

First layer

In [2]:
layer = nn.Dense(2)
layer

Dense(None -> 2, linear)

In [3]:
layer.initialize()  # weight -> {-0.7, 0.7}.uniform, bias -> 0?

In [4]:
x = nd.random.uniform(-1, 1, (3,4))
print("x: ", x)
layer(x)

x:  
[[ 0.09762704  0.18568921  0.43037868  0.6885315 ]
 [ 0.20552671  0.71589124  0.08976638  0.6945034 ]
 [-0.15269041  0.24712741  0.29178822 -0.23123658]]
<NDArray 3x4 @cpu(0)>



[[-0.02524132 -0.00874885]
 [-0.06026538 -0.01308061]
 [ 0.02468396 -0.02181557]]
<NDArray 3x2 @cpu(0)>

In [5]:
layer.weight.data()


[[-0.00873779 -0.02834515  0.05484822 -0.06206018]
 [ 0.06491279 -0.03182812 -0.01631819 -0.00312688]]
<NDArray 2x4 @cpu(0)>

## Lenet implementation

In [6]:
net = nn.Sequential()
net.add(
    # we can use a tuple to specify a  non-square
    nn.Conv2D(channels=6, kernel_size=5, activation="relu"),
    nn.MaxPool2D(pool_size=2, strides=2),
    nn.Conv2D(channels=16, kernel_size=3, activation="relu"),
    nn.MaxPool2D(pool_size=2, strides=2),
    # The Dense layer will automatically reshape the 4-D output of last
    # maxpooling layer into the 2-D shape: (x.shape[0], x.size/x.shape[0])

    # gives output
    nn.Dense(120, activation="relu"),
    nn.Dense(84, activation="relu"),
    nn.Dense(10)
)
net


Sequential(
  (0): Conv2D(None -> 6, kernel_size=(5, 5), stride=(1, 1), Activation(relu))
  (1): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
  (2): Conv2D(None -> 16, kernel_size=(3, 3), stride=(1, 1), Activation(relu))
  (3): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
  (4): Dense(None -> 120, Activation(relu))
  (5): Dense(None -> 84, Activation(relu))
  (6): Dense(None -> 10, linear)
)

In [7]:
net.initialize()
# Input shape is (batch_size, color_channels, height, width)
x = nd.random.uniform(shape=(4,1,28,28))
y = net(x)
y.shape

(4, 10)

In [8]:
(net[0].weight.data().shape, net[5].bias.data().shape)

((6, 1, 5, 5), (84,))

## Flexible NN
it is a similar way to creating NN with Pytorch

In [9]:
class MixMLP(nn.Block):
    def __init__(self, **kwargs):
        # Run `nn.Block`'s init method
        super(MixMLP, self).__init__(**kwargs)
        self.blk = nn.Sequential()
        self.blk.add(nn.Dense(3, activation='relu'),
                     nn.Dense(4, activation='relu'))
        self.dense = nn.Dense(5)
    def forward(self, x):
        y = nd.relu(self.blk(x))
        print(y)
        return self.dense(y)

net = MixMLP()
net

MixMLP(
  (blk): Sequential(
    (0): Dense(None -> 3, Activation(relu))
    (1): Dense(None -> 4, Activation(relu))
  )
  (dense): Dense(None -> 5, linear)
)