In [1]:
#coding: utf-8
%matplotlib inline
import d2lzh as d2l
from mxnet import autograd, nd

In [2]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

Downloading C:\Users\Administrator\.mxnet\datasets\fashion-mnist\train-images-idx3-ubyte.gz from https://apache-mxnet.s3.cn-north-1.amazonaws.com.cn/gluon/dataset/fashion-mnist/train-images-idx3-ubyte.gz...


Downloading C:\Users\Administrator\.mxnet\datasets\fashion-mnist\train-labels-idx1-ubyte.gz from https://apache-mxnet.s3.cn-north-1.amazonaws.com.cn/gluon/dataset/fashion-mnist/train-labels-idx1-ubyte.gz...


Downloading C:\Users\Administrator\.mxnet\datasets\fashion-mnist\t10k-images-idx3-ubyte.gz from https://apache-mxnet.s3.cn-north-1.amazonaws.com.cn/gluon/dataset/fashion-mnist/t10k-images-idx3-ubyte.gz...


Downloading C:\Users\Administrator\.mxnet\datasets\fashion-mnist\t10k-labels-idx1-ubyte.gz from https://apache-mxnet.s3.cn-north-1.amazonaws.com.cn/gluon/dataset/fashion-mnist/t10k-labels-idx1-ubyte.gz...


In [6]:
num_inputs = 784
num_outputs = 10

W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))
b = nd.zeros(num_outputs)


In [7]:
W.attach_grad()
b.attach_grad()

In [8]:

X = nd.array([[1, 2, 3], [4, 5, 6]])
X.sum(axis=0, keepdims=True), X.sum(axis=1, keepdims=True)


(
 [[5. 7. 9.]]
 <NDArray 1x3 @cpu(0)>, 
 [[ 6.]
  [15.]]
 <NDArray 2x1 @cpu(0)>)

In [11]:
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(axis=1, keepdims=True)
    return X_exp / partition # 这里应用了广播机制

In [12]:
X = nd.random.normal(shape=(2, 5))
X_prob = softmax(X)
X_prob, X_prob.sum(axis=1)

(
 [[0.32651287 0.2196676  0.10925111 0.2976461  0.04692229]
  [0.78728867 0.02220864 0.02097706 0.14454156 0.02498409]]
 <NDArray 2x5 @cpu(0)>, 
 [0.99999994 1.        ]
 <NDArray 2 @cpu(0)>)

In [13]:
def net(X):
    return softmax(nd.dot(X.reshape((-1, num_inputs)), W) + b)

In [14]:
y_hat = nd.array([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = nd.array([0, 2], dtype='int32')
nd.pick(y_hat, y)


[0.1 0.5]
<NDArray 2 @cpu(0)>

In [15]:
def cross_entropy(y_hat, y):
    return - nd.pick(y_hat, y).log()

In [16]:
def accuracy(y_hat, y):
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()

In [17]:
accuracy(y_hat, y)

0.5

In [22]:
# 本函数已保存在d2lzh包中方便以后使用。该函数将被逐步改进：它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        y = y.astype('float32')
        acc_sum += (net(X).argmax(axis=1) == y).sum().asscalar()
        n += y.size
    return acc_sum / n

In [25]:
evaluate_accuracy(test_iter, net)

0.1141

In [29]:
num_epochs, lr = 5, 0.1

def train_ch3(net, 
              train_iter, 
              test_iter,
              loss,
              num_epochs,
              batch_size,
              params=None,
              lr=None,
              trainer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            if trainer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size) #  “softmax回归的简洁实现”一节将用到
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net, 
          train_iter,
          test_iter,
          cross_entropy,
          num_epochs,
          batch_size,
          [W, b], lr)

epoch 1, loss 0.7885, train acc 0.746, test acc 0.803


epoch 2, loss 0.5725, train acc 0.813, test acc 0.824


epoch 3, loss 0.5280, train acc 0.824, test acc 0.827


epoch 4, loss 0.5053, train acc 0.830, test acc 0.836


epoch 5, loss 0.4891, train acc 0.835, test acc 0.840


In [31]:
for X, y in test_iter:
    break

true_labels = d2l.get_fashion_mnist_labels(y.asnumpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]

d2l.show_fashion_mnist(X[0:9], titles[0:9])

<Figure size 864x864 with 9 Axes>