# with gluon

In [2]:
from mxnet import nd
from mxnet.gluon import nn



In [3]:
net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(256, activation="relu"))
    net.add(nn.Dense(10))

print(net)

Sequential(
  (0): Dense(256, Activation(relu))
  (1): Dense(10, linear)
)


## use nn.Block define MLP

In [4]:
class MLP(nn.Block):
    
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = nn.Dense(256)
            self.dense1 = nn.Dense(10)
            
    def forward(self, X):
        layer1 = nd.relu(self.dense0(X))
        layer2_without_act = self.dense1(layer1)
        return layer2_without_act

In [5]:
my_net = MLP()
my_net

MLP(
  (dense0): Dense(256, linear)
  (dense1): Dense(10, linear)
)

In [6]:
X = nd.random.uniform(shape=(4,20))
my_net.initialize()
y = my_net(X)
y


[[ 0.05502447  0.01093244 -0.05812225 -0.00867474  0.00780752 -0.03732029
  -0.11888048 -0.01667178 -0.12706244 -0.00605519]
 [ 0.05254333 -0.03761618 -0.03303654 -0.06370584  0.02936437 -0.04790818
  -0.07402188  0.00388384 -0.09476319  0.00247342]
 [ 0.03847572 -0.01801044 -0.02936447 -0.04202728  0.00755377 -0.06616984
  -0.08015118  0.04540668 -0.08034274  0.00180145]
 [ 0.03042224 -0.04749024 -0.00121015 -0.08124933  0.03479041 -0.06163511
  -0.10677548  0.04019741 -0.1076465   0.01437488]]
<NDArray 4x10 @cpu(0)>

In [7]:
nn.Dense

mxnet.gluon.nn.basic_layers.Dense

print the layer' name

In [8]:
print('default prefix:', my_net.dense0.name)
net3 = MLP(prefix='another_mlp_')
print('customized prefix:', net3.dense0.name)

default prefix: mlp0_dense0
customized prefix: another_mlp_dense0


nn.Block到底是什么东西？
在gluon里，nn.Block是一个一般化的部件。整个神经网络可以是一个nn.Block，单个层也是一个nn.Block。我们可以（近似）无限地嵌套nn.Block来构建新的nn.Block。

nn.Block主要提供这个东西

存储参数
描述forward如何执行
自动求导

# http://zh.gluon.ai/chapter_gluon-basics/block.html

# CNN

In [12]:
from mxnet import nd

# 输入输出数据格式是 batch x channel x height x width，这里batch和channel都是1
# 权重格式是 output_channels x in_channels x height x width，这里input_filter和output_filter都是1。
w = nd.arange(9).reshape((1, 1, 3, 3))
b = nd.array([1])
data = nd.arange(16).reshape((1, 1, 4, 4))
out = nd.Convolution(data, w, b, kernel=w.shape[2:], num_filter=w.shape[1], stride=(2, 2), pad=(1, 1))

print('input:', data, '\n\nweight:', w, '\n\nbias:', b, '\n\noutput:', out)


input: 
[[[[  0.   1.   2.   3.]
   [  4.   5.   6.   7.]
   [  8.   9.  10.  11.]
   [ 12.  13.  14.  15.]]]]
<NDArray 1x1x4x4 @cpu(0)> 

weight: 
[[[[ 0.  1.  2.]
   [ 3.  4.  5.]
   [ 6.  7.  8.]]]]
<NDArray 1x1x3x3 @cpu(0)> 

bias: 
[ 1.]
<NDArray 1 @cpu(0)> 

output: 
[[[[  74.  155.]
   [ 280.  439.]]]]
<NDArray 1x1x2x2 @cpu(0)>


In [13]:
w.shape[2:]

(3, 3)

In [14]:
w.shape[1]

1

In [15]:
out = nd.Convolution(data, w, b, kernel=w.shape[2:], num_filter=w.shape[1],
                     stride=(2,2), pad=(1,1))

print('input:', data, '\n\nweight:', w, '\n\nbias:', b, '\n\noutput:', out)

input: 
[[[[  0.   1.   2.   3.]
   [  4.   5.   6.   7.]
   [  8.   9.  10.  11.]
   [ 12.  13.  14.  15.]]]]
<NDArray 1x1x4x4 @cpu(0)> 

weight: 
[[[[ 0.  1.  2.]
   [ 3.  4.  5.]
   [ 6.  7.  8.]]]]
<NDArray 1x1x3x3 @cpu(0)> 

bias: 
[ 1.]
<NDArray 1 @cpu(0)> 

output: 
[[[[  74.  155.]
   [ 280.  439.]]]]
<NDArray 1x1x2x2 @cpu(0)>


# start

In [22]:
from mxnet import gluon
def transform(data, label):
    return data.astype('float32')/255, label.astype('float32')

mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)

In [29]:
mnist_train[0][0].shape

(28, 28, 1)

In [16]:
import mxnet as mx

try:
    ctx = mx.gpu()
    _ = nd.zeros((1,), ctx=ctx)
except:
    ctx = mx.cpu()
ctx

gpu(0)

In [31]:
weight_scale = .01

# output channels = 20, kernel = (5,5)
W1 = nd.random_normal(shape=(20,1,5,5), scale=weight_scale, ctx=ctx)
b1 = nd.zeros(W1.shape[0], ctx=ctx)

# output channels = 50, kernel = (3,3)
W2 = nd.random_normal(shape=(50,20,3,3), scale=weight_scale, ctx=ctx)
b2 = nd.zeros(W2.shape[0], ctx=ctx)

# output dim = 128
W3 = nd.random_normal(shape=(1250, 128), scale=weight_scale, ctx=ctx)
b3 = nd.zeros(W3.shape[1], ctx=ctx)

# output dim = 10
W4 = nd.random_normal(shape=(W3.shape[1], 10), scale=weight_scale, ctx=ctx)
b4 = nd.zeros(W4.shape[1], ctx=ctx)

params = [W1, b1, W2, b2, W3, b3, W4, b4]
for param in params:
    param.attach_grad()

In [32]:
import sys
sys.path.append("/mnt/D/Ubuntu/PycharmProjects/deeplearning_notebook/mxnet_study")

In [33]:
import utils

## VGG架构
VGG的一个关键是使用很多有着相对小的kernel（3×33×3）的卷积层然后接上一个池化层，之后再将这个模块重复多次。下面我们先定义一个这样的块：

In [17]:
def vgg_block(num_convs, channels):
    out = nn.Sequential()
    for _ in range(num_convs):
        out.add(
            nn.Conv2D(channels=channels, kernel_size=3,
                      padding=1, activation='relu')
        )
    out.add(nn.MaxPool2D(pool_size=2, strides=2))
    return out

<module 'mxnet.gluon.nn' from '/mnt/D/Ubuntu/package/anaconda3/lib/python3.6/site-packages/mxnet/gluon/nn/__init__.py'>