[View in Colaboratory](https://colab.research.google.com/github/brucecmd/learn_gluon/blob/master/softmax_classification.ipynb)

In [3]:
!pip install mxnet-cu80

Collecting mxnet-cu80
[?25l  Downloading https://files.pythonhosted.org/packages/f6/6c/566a1d4b8b1005b7d9ccfaecd7632f6dca596246f6657827b2d4e97c72c7/mxnet_cu80-1.2.1-py2.py3-none-manylinux1_x86_64.whl (299.1MB)
[K    100% |████████████████████████████████| 299.1MB 76kB/s 
Collecting graphviz<0.9.0,>=0.8.1 (from mxnet-cu80)
  Downloading https://files.pythonhosted.org/packages/53/39/4ab213673844e0c004bed8a0781a0721a3f6bb23eb8854ee75c236428892/graphviz-0.8.4-py2.py3-none-any.whl
Installing collected packages: graphviz, mxnet-cu80
Successfully installed graphviz-0.8.4 mxnet-cu80-1.2.1


In [0]:
from mxnet.gluon import data as gdata

In [0]:
mnist_train = gdata.vision.FashionMNIST(train=True)
mnist_test = gdata.vision.FashionMNIST(train=False)

In [0]:
transformer = gdata.vision.transforms.ToTensor()

In [0]:
batch_size = 256
train_iter = gdata.DataLoader(mnist_train.transform_first(transformer), batch_size, shuffle=True)
test_iter = gdata.DataLoader(mnist_test.transform_first(transformer), batch_size, shuffle=True)

In [0]:
from mxnet import nd, autograd

In [0]:
num_inputs = 28 * 28
num_outputs = 10
w = nd.random_normal(0,1,shape=(num_inputs, num_outputs))
b = nd.zeros(num_outputs)
params = [w,b]
for p in params:
    p.attach_grad()

In [0]:
def softmax(x):
    exp_value = nd.exp(x)
    partition = exp_value.sum(axis=1, keepdims=True)
    return exp_value / partition

In [0]:
def net(x):
    return softmax(nd.dot(x.reshape(-1,num_inputs),w) + b)

In [0]:
def loss_func(y, y_hat):
    return - nd.pick(y_hat.log(), y)

In [0]:
from mxnet import gluon

In [0]:
def sgd(params, batch_size, lr):
    for p in params:
        p[:] -= p.grad * lr / batch_size

In [79]:
def accuracy(y, y_hat):
    return (y_hat.argmax(axis=1)==y.astype('float32')).mean().asscalar()
    
def evaluate_accuracy(data_iter, net):
    acc = 0
    for feature, data in data_iter:
        acc += accuracy(data, net(feature))
    return acc / len(data_iter)
  
epochs = 10
batch_size = 256
lr = 0.1
for i in range(epochs):
    for feature, label in train_iter:
        with autograd.record():
            y_hat = net(feature)
            l = loss_func(label, y_hat)
            #print(label.shape)
            #print(y_hat.shape)
        l.backward()
        sgd(params, batch_size, lr)
    test_acc = evaluate_accuracy(test_iter, net)
    train_acc = evaluate_accuracy(train_iter,net)
    print('epoch %d, train acc[%f], test acc [%f]'%(i, train_acc, test_acc))

epoch 0, train acc[0.583023], test acc [0.584180]
epoch 1, train acc[0.645479], test acc [0.648926]
epoch 2, train acc[0.685888], test acc [0.686426]
epoch 3, train acc[0.707829], test acc [0.711621]
epoch 4, train acc[0.722606], test acc [0.721875]
epoch 5, train acc[0.733721], test acc [0.733008]
epoch 6, train acc[0.744243], test acc [0.747266]
epoch 7, train acc[0.749540], test acc [0.751660]
epoch 8, train acc[0.757691], test acc [0.762988]
epoch 9, train acc[0.761015], test acc [0.763867]
