In [None]:
from mxnet.gluon import nn
import mxnet as mx
import mxnet.ndarray as nd
net = nn.Sequential()
net.add(nn.Dense(1))

net.initialize(ctx=mx.gpu())

In [None]:
data = nd.random.uniform(shape=[3,2], ctx=mx.gpu())
print (net(data))
print (net)
print (net[0].weight.data())

输入为单通道：

In [None]:
from mxnet import nd
w = nd.arange(4).reshape((1,1,2,2))
b = nd.array([1])

data = nd.arange(9).reshape((1,1,3,3))
out = nd.Convolution(data, w, b, kernel=w.shape[2:], num_filter=w.shape[1])
print ('input:',data,'\n\nweight:',w,'\n\nbias:', b, '\n\noutput:', out)

输入为多通道：

In [None]:
w = nd.arange(24).reshape((3,2,2,2))
data = nd.arange(18).reshape((1,2,3,3))
b = nd.array([1,2,3])

out = nd.Convolution(data, w, b, kernel=w.shape[2:], num_filter=w.shape[0])

print('input:', data, '\n\nweight:', w, '\n\nbias:', b, '\n\noutput:', out)

In [None]:
w = nd.arange(24).reshape((2,3,2,2))
print('weight:', w)

In [None]:
data = nd.arange(18).reshape((1,2,3,3))

max_pool = nd.Pooling(data=data, pool_type="max", kernel=(2,2))


print('data:', data, '\n\nmax pooling:', max_pool)

## 获取数据

In [None]:
import sys
sys.path.append('..')
from utils import load_data_fashion_mnist

batch_size = 256
train_data, test_data = load_data_fashion_mnist(batch_size)

## 定义模型

In [None]:
import mxnet as mx

try:
    ctx = mx.gpu()
    _ = nd.zeros((1,), ctx=ctx)
except:
    ctx = mx.cpu()
ctx

LeNet:

![lenet](./lenet.jpeg)

In [None]:
weight_scale = .01

# output channels = 20, kernel = (5,5)
W1 = nd.random_normal(shape=(20,1,5,5), scale=weight_scale, ctx=ctx)
b1 = nd.zeros(W1.shape[0], ctx=ctx)

# output channels = 50, kernel = (3,3)
W2 = nd.random_normal(shape=(50,20,3,3), scale=weight_scale, ctx=ctx)
b2 = nd.zeros(W2.shape[0], ctx=ctx)

# output dim = 128
W3 = nd.random_normal(shape=(1250, 128), scale=weight_scale, ctx=ctx)
b3 = nd.zeros(W3.shape[1], ctx=ctx)

# output dim = 10
W4 = nd.random_normal(shape=(W3.shape[1], 10), scale=weight_scale, ctx=ctx)
b4 = nd.zeros(W4.shape[1], ctx=ctx)

params = [W1, b1, W2, b2, W3, b3, W4, b4]
for param in params:
    param.attach_grad()

In [None]:
print (param)

In [None]:
def net(X, verbose=False):
    X = X.as_in_context(W1.context)
    # 第一层卷积
    h1_conv = nd.Convolution(
        data=X, weight=W1, bias=b1, kernel=W1.shape[2:], num_filter=W1.shape[0])
    h1_activation = nd.relu(h1_conv)
    h1 = nd.Pooling(
        data=h1_activation, pool_type="max", kernel=(2,2), stride=(2,2))
    # 第二层卷积
    h2_conv = nd.Convolution(
        data=h1, weight=W2, bias=b2, kernel=W2.shape[2:], num_filter=W2.shape[0])
    h2_activation = nd.relu(h2_conv)
    h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2,2), stride=(2,2))
    h2 = nd.flatten(h2)
    # 第一层全连接
    h3_linear = nd.dot(h2, W3) + b3
    h3 = nd.relu(h3_linear)
    # 第二层全连接
    h4_linear = nd.dot(h3, W4) + b4
    if verbose:
        print('1st conv block:', h1.shape)
        print('2nd conv block:', h2.shape)
        print('1st dense:', h3.shape)
        print('2nd dense:', h4_linear.shape)
        print('output:', h4_linear)
    return h4_linear

In [None]:
for data, _ in train_data:
    out = net(data, verbose=True)
    print (out.max(axis=1)) #out.argmax(axis=1)
    break

## 训练

In [None]:
from mxnet import autograd as autograd
from utils import SGD, accuracy, evaluate_accuracy
from mxnet import gluon

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

learning_rate = .2

for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        SGD(params, learning_rate/batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, label)

    test_acc = evaluate_accuracy(test_data, net, ctx)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
        epoch, train_loss/len(train_data),
        train_acc/len(train_data), test_acc))

# GLUON

In [None]:
net = nn.Sequential()
with net.name_scope():
    net.add(
        nn.Conv2D(channels=20, kernel_size=5, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=50, kernel_size=3, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Flatten(),
        nn.Dense(128, activation="relu"),
        nn.Dense(10)
    )

In [None]:
from mxnet import gluon
import sys
sys.path.append('..')
import utils

# 初始化
ctx = utils.try_gpu()
net.initialize(ctx=ctx)
print('initialize weight on', ctx)

# 获取数据
batch_size = 256
train_data, test_data = utils.load_data_fashion_mnist(batch_size)

# 训练
loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),
                        'sgd', {'learning_rate': 0.5})
utils.train(train_data, test_data, net, loss,
            trainer, ctx, num_epochs=5)

### 复杂网络模型

In [91]:
from mxnet.gluon import nn
from mxnet import init

class BaseMLP(nn.Block):
    def __init__(self, **kwargs):
        super(BaseMLP, self).__init__(**kwargs)
        self.net = nn.Sequential()
        with self.net.name_scope():
            self.net.add(nn.Dense(256, activation='relu'))
            self.net.add(nn.Dense(128, activation='relu'))
            self.dense = nn.Dense(128)
    def forward(self, x):
        return nd.relu(self.dense(self.net(x)))
        #网络形状应该由forward推到出来的把？
print (BaseMLP())
abc_mlp = nn.Sequential()
abc_mlp.add(BaseMLP())
abc_mlp.add(nn.Dense(10))

print (abc_mlp)
print (params)

BaseMLP(
  (net): Sequential(
    (0): Dense(None -> 256, Activation(relu))
    (1): Dense(None -> 128, Activation(relu))
  )
  (dense): Dense(None -> 128, linear)
)
Sequential(
  (0): BaseMLP(
    (net): Sequential(
      (0): Dense(None -> 256, Activation(relu))
      (1): Dense(None -> 128, Activation(relu))
    )
    (dense): Dense(None -> 128, linear)
  )
  (1): Dense(None -> 10, linear)
)
sequential43_ (
  Parameter sequential44_dense0_weight (shape=(256, 5), dtype=<class 'numpy.float32'>)
  Parameter sequential44_dense0_bias (shape=(256,), dtype=<class 'numpy.float32'>)
  Parameter sequential44_dense1_weight (shape=(128, 256), dtype=<class 'numpy.float32'>)
  Parameter sequential44_dense1_bias (shape=(128,), dtype=<class 'numpy.float32'>)
  Parameter sequential44_dense2_weight (shape=(128, 128), dtype=<class 'numpy.float32'>)
  Parameter sequential44_dense2_bias (shape=(128,), dtype=<class 'numpy.float32'>)
  Parameter dense12_weight (shape=(10, 128), dtype=<class 'numpy.float32

In [92]:
x = nd.random.uniform(shape=(3,5))

abc_mlp.initialize(init=init.One())#不执行就报错

params = abc_mlp.collect_params()
print(params)

abc_mlp(x)
print(params)#不给数据，不出最终的参数规模(shape)
#params.initial(init=init)

sequential46_ (
  Parameter sequential47_dense0_weight (shape=(256, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential47_dense0_bias (shape=(256,), dtype=<class 'numpy.float32'>)
  Parameter sequential47_dense1_weight (shape=(128, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential47_dense1_bias (shape=(128,), dtype=<class 'numpy.float32'>)
  Parameter sequential47_dense2_weight (shape=(128, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential47_dense2_bias (shape=(128,), dtype=<class 'numpy.float32'>)
  Parameter dense13_weight (shape=(10, 0), dtype=<class 'numpy.float32'>)
  Parameter dense13_bias (shape=(10,), dtype=<class 'numpy.float32'>)
)
sequential46_ (
  Parameter sequential47_dense0_weight (shape=(256, 5), dtype=<class 'numpy.float32'>)
  Parameter sequential47_dense0_bias (shape=(256,), dtype=<class 'numpy.float32'>)
  Parameter sequential47_dense1_weight (shape=(128, 256), dtype=<class 'numpy.float32'>)
  Parameter sequential47_dense1_bias (shape=(128,), d