In [1]:
# 批量归一化 — 使用Gluon
# http://zh.gluon.ai/chapter_convolutional-neural-networks/batch-norm-gluon.html

# 定义模型并添加批量归一化层
from mxnet.gluon import nn

net = nn.Sequential()
with net.name_scope():
    # 第一层卷积
    net.add(nn.Conv2D(channels=20, kernel_size=5))
    ### 添加了批量归一化层
    net.add(nn.BatchNorm(axis=1))
    net.add(nn.Activation(activation='relu'))
    net.add(nn.MaxPool2D(pool_size=2, strides=2))
    # 第二层卷积
    net.add(nn.Conv2D(channels=50, kernel_size=3))
    ### 添加了批量归一化层
    net.add(nn.BatchNorm(axis=1))
    net.add(nn.Activation(activation='relu'))
    net.add(nn.MaxPool2D(pool_size=2, strides=2))
    net.add(nn.Flatten())
    # 第一层全连接
    net.add(nn.Dense(128, activation=None))
#     net.add(nn.BatchNorm(axis=1))
    net.add(nn.Activation(activation='relu'))
    # 第二层全连接
    net.add(nn.Dense(10))

  from ._conv import register_converters as _register_converters


In [2]:
# 模型训练
import sys
sys.path.append('..')
import utils
from mxnet import autograd
from mxnet import gluon
from mxnet import nd

ctx = utils.try_gpu()
net.initialize(ctx=ctx)

batch_size = 256
def transform(data, label):
#     print(data.shape)
#     print(label.shape)
# change data from batch x height x width x channel
# to batch x channel x height x width
#     return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
    return nd.transpose(data.astype('float32'), (2,0,1))/255, label.astype('float32')
#     return data.astype('float32') / 255, label.astype('float32')
mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)
train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.2})

for epoch in range(10):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data.as_in_context(ctx))
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)
    test_acc = utils.evaluate_accuracy(test_data, net, ctx)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
        epoch, train_loss/len(train_data),
        train_acc/len(train_data), test_acc))

Epoch 0. Loss: 0.594382, Train acc 0.779682, Test acc 0.866797
Epoch 1. Loss: 0.354826, Train acc 0.868562, Test acc 0.880273
Epoch 2. Loss: 0.312137, Train acc 0.884685, Test acc 0.890039
Epoch 3. Loss: 0.279128, Train acc 0.896088, Test acc 0.898047
Epoch 4. Loss: 0.260331, Train acc 0.902554, Test acc 0.900195
Epoch 5. Loss: 0.239962, Train acc 0.910023, Test acc 0.900879
Epoch 6. Loss: 0.224845, Train acc 0.918041, Test acc 0.906152
Epoch 7. Loss: 0.212829, Train acc 0.920107, Test acc 0.896191
Epoch 8. Loss: 0.200879, Train acc 0.924618, Test acc 0.912012
Epoch 9. Loss: 0.189852, Train acc 0.928901, Test acc 0.908008
