[View in Colaboratory](https://colab.research.google.com/github/brucecmd/learn_gluon/blob/master/lenet.ipynb)

In [0]:
from mxnet.gluon import nn
import mxnet as mx
from mxnet.gluon import loss as gloss
from time import time
from mxnet.gluon import data as gdata
from mxnet import gluon, init, autograd, nd

In [0]:
def try_gpu():
    try:
        ctx = mx.gpu()
        _ = nd.zeros((1,), ctx=ctx)
    except:
        ctx = mx.cpu()
    return ctx

In [0]:
net = nn.Sequential()
net.add(
nn.Conv2D(channels=6, kernel_size=5, activation='sigmoid'),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Conv2D(channels=16, kernel_size=5, activation='sigmoid'),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Dense(120, activation='sigmoid'),
nn.Dense(84, activation='sigmoid'),
nn.Dense(10)
)
ctx = try_gpu()
net.initialize(init.Normal(sigma=1), ctx=ctx) # 要注意的是，这里的sigma选太小的话，训不出结果来

In [0]:
mnist_train = gdata.vision.FashionMNIST(train=True)
mnist_test = gdata.vision.FashionMNIST(train=False)

In [0]:
batch_size = 256
transformer = gdata.vision.transforms.ToTensor()
train_iter = gdata.DataLoader(mnist_train.transform_first(transformer), batch_size, shuffle=True)
test_iter = gdata.DataLoader(mnist_test.transform_first(transformer), batch_size, shuffle=True)

In [0]:
def accuracy(y_hat, y):
    return (y_hat.argmax(axis=1)==y.astype('float32')).mean().asscalar()

  
def evaluate_accuracy(data_iter, net, ctx):
    acc = nd.array([0], ctx=ctx)
    for x, y in data_iter:
        x = x.as_in_context(ctx)
        y = y.as_in_context(ctx)
        acc += accuracy(net(x), y)
    return acc.asscalar() / len(data_iter)

In [0]:
loss_func = gloss.SoftmaxCrossEntropyLoss()

In [0]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':0.1})

In [30]:
epochs = 10
batch_size = 256

ctx = try_gpu()

for i in range(epochs):
    for data, label in train_iter:
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            y_hat = net(data)
            l = loss_func(y_hat, label)
        l.backward()
        trainer.step(batch_size)
    train_acc = evaluate_accuracy(train_iter, net, ctx)
    test_acc = evaluate_accuracy(test_iter, net, ctx)
    print('epoch[%d], train acc[%f], test acc[%f]'%(i, train_acc, test_acc))

epoch[0], train acc[0.689412], test acc[0.689844]
epoch[1], train acc[0.719686], test acc[0.715723]
epoch[2], train acc[0.748149], test acc[0.740625]
epoch[3], train acc[0.755979], test acc[0.747363]
epoch[4], train acc[0.757757], test acc[0.748047]
epoch[5], train acc[0.773521], test acc[0.760645]
epoch[6], train acc[0.781444], test acc[0.777930]
epoch[7], train acc[0.786830], test acc[0.780762]
epoch[8], train acc[0.789888], test acc[0.785547]
epoch[9], train acc[0.795883], test acc[0.791309]
