In [1]:
import d2lzh as d2l
import mxnet as mx
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn
import time

In [4]:
net = nn.Sequential()
net.add(nn.Conv2D(channels=6, kernel_size=5, activation='sigmoid'),
       nn.MaxPool2D(pool_size=2, strides=2),
       nn.Conv2D(channels=16, kernel_size=5, activation='sigmoid'),
       nn.MaxPool2D(pool_size=2, strides=2),
       nn.Dense(120, activation='sigmoid'),
       nn.Dense(84, activation='sigmoid'),
       nn.Dense(10))

In [6]:
X = nd.random.uniform(shape=(1, 1, 28, 28))
net.initialize()
for layer in net:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)

conv0 output shape:	 (1, 6, 24, 24)
pool0 output shape:	 (1, 6, 12, 12)
conv1 output shape:	 (1, 16, 8, 8)
pool1 output shape:	 (1, 16, 4, 4)
dense0 output shape:	 (1, 120)
dense1 output shape:	 (1, 84)
dense2 output shape:	 (1, 10)


  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)


In [7]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
def try_gpu():
    try:
        ctx = mx.gpu()
        _ = nd.zeros((1,), ctx=ctx)
    except mx.base.MXNetError:
        ctx = mx.cpu()
    return ctx
ctx = try_gpu()
ctx

gpu(0)

In [8]:
def evaluate_accuracy(data_iter, net, ctx):
    acc_sum, n = nd.array([0], ctx=ctx), 0
    for X, y in data_iter:
        # 如果ctx代表gpu及相应显存，则将数据复制到显存上
        X, y = X.as_in_context(ctx), y.as_in_context(ctx).astype('float32')
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.size
    return acc_sum.asscalar() / n

In [15]:
def train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs):
    print('training on', ctx)
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X, y = X.as_in_context(ctx), y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(test_iter, net, ctx)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec' 
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc, time.time() - start))

In [16]:
lr, num_epochs = 0.9, 10
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':lr})
train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

training on gpu(0)
epoch 1, loss 2.3195, train acc 0.101, test acc 0.100, time 6.0 sec
epoch 2, loss 1.5665, train acc 0.390, test acc 0.610, time 5.8 sec
epoch 3, loss 0.8967, train acc 0.648, test acc 0.713, time 5.8 sec
epoch 4, loss 0.7202, train acc 0.716, test acc 0.743, time 5.7 sec
epoch 5, loss 0.6409, train acc 0.745, test acc 0.763, time 5.8 sec
epoch 6, loss 0.5889, train acc 0.766, test acc 0.789, time 5.7 sec
epoch 7, loss 0.5382, train acc 0.787, test acc 0.806, time 5.7 sec
epoch 8, loss 0.5097, train acc 0.801, test acc 0.822, time 5.8 sec
epoch 9, loss 0.4815, train acc 0.814, test acc 0.836, time 5.7 sec
epoch 10, loss 0.4615, train acc 0.824, test acc 0.834, time 5.8 sec
