In [1]:
from mxnet.gluon import nn

net = nn.Sequential()
with net.name_scope():
    net.add(
        # 第一阶段
        nn.Conv2D(channels=96, kernel_size=11,
                 strides=4, activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2),
        # 第二阶段
        nn.Conv2D(channels=256, kernel_size=5,
                 padding=2, activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2),
        # 第三阶段
        nn.Conv2D(channels=384, kernel_size=3,
                 padding=1, activation='relu'),
        nn.Conv2D(channels=384, kernel_size=3,
                 padding=1, activation='relu'),
        nn.Conv2D(channels=256, kernel_size=3,
                 padding=1, activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2),
        # 第四阶段
        nn.Flatten(),
        nn.Dense(4096, activation='relu'),
        nn.Dropout(.5),
        # 第五阶段
        nn.Dense(4096, activation='relu'),
        nn.Dropout(.5),
        # 第六阶段
        nn.Dense(10)
    )
print(net)

  from ._conv import register_converters as _register_converters


Sequential(
  (0): Conv2D(None -> 96, kernel_size=(11, 11), stride=(4, 4))
  (1): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (2): Conv2D(None -> 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (3): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (4): Conv2D(None -> 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): Conv2D(None -> 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): Conv2D(None -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (8): Flatten
  (9): Dense(None -> 4096, Activation(relu))
  (10): Dropout(p = 0.5, axes=())
  (11): Dense(None -> 4096, Activation(relu))
  (12): Dropout(p = 0.5, axes=())
  (13): Dense(None -> 10, linear)
)


In [3]:
# 读取数据
import sys
sys.path.append('..')
import utils
from mxnet import autograd
from mxnet import gluon
from mxnet import nd
from mxnet import image

batch_size = 256
resize=224
def transform(data, label):
#     print(data.shape)   # (28, 28, 1)
#     print(label.shape)    # (1,)
# change data from batch x height x width x channel
# to batch x channel x height x width
#     return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
    data = image.imresize(data, resize, resize)
#     data = new_data
            
    return nd.transpose(data.astype('float32'), (2,0,1))/255, label.astype('float32')
#     return data.astype('float32') / 255, label.astype('float32')
mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)
train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)


In [4]:
# 训练
import time
from mxnet import init
from mxnet import gluon

ctx = utils.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier())

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainer = gluon.Trainer(net.collect_params(), 'sgd', {
    'learning_rate': 0.01
})

print_net = True

for epoch in range(10):
    time_start = time.time()
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data.as_in_context(ctx))
            loss = softmax_cross_entropy(output, label)
        if print_net:
            print(net)
            print_net = False
        loss.backward()
        trainer.step(batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)
    test_acc = utils.evaluate_accuracy(test_data, net, ctx)
    print("Epoch %d. Loss: %.4f, Train acc %.4f, Test acc %.4f, Time %.0f sec" % (
        epoch, train_loss/len(train_data),
        train_acc/len(train_data), test_acc, time.time() - time_start))

Sequential(
  (0): Conv2D(1 -> 96, kernel_size=(11, 11), stride=(4, 4))
  (1): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (2): Conv2D(96 -> 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (3): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (4): Conv2D(256 -> 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): Conv2D(384 -> 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): Conv2D(384 -> 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (8): Flatten
  (9): Dense(6400 -> 4096, Activation(relu))
  (10): Dropout(p = 0.5, axes=())
  (11): Dense(4096 -> 4096, Activation(relu))
  (12): Dropout(p = 0.5, axes=())
  (13): Dense(4096 -> 10, linear)
)


KeyboardInterrupt: 