# 多类逻辑回归 — 从0开始

In [54]:
from __future__ import print_function
import mxnet as mx
from mxnet import nd, autograd
import numpy as np
from mxnet import gluon

ctx = mx.cpu()

In [40]:
def transform(data, label):
    return data.astype('float32')/255, label.astype('float32')

mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)

In [41]:
data, label = mnist_train[0]
print(data.shape)
print(label)

(28, 28, 1)
2.0


In [42]:
batch_size = 256
train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

In [43]:
num_inputs = 784
num_outputs = 10

W = nd.random_normal(shape=(num_inputs, num_outputs))
b = nd.random_normal(shape=num_outputs)

params = [W, b]

In [44]:
for param in params:
    param.attach_grad()

In [45]:
def softmax(y_linear):
    """
    
    :param y_linear: nd.array
    :return: 
    """
    exp = nd.exp(y_linear-nd.max(y_linear))
    # print(exp)
    # 分母求和
    norms = nd.sum(exp, axis=0, exclude=True).reshape((-1, 1))
    # print(norms)
    return exp / norms

In [46]:
sample_y_linear = nd.random_normal(shape=(2, 10))
sample_y_linear


[[-0.25217363 -0.04621419 -0.16788334 -0.92540014  0.44093993  1.60368168
   0.04739976 -0.50890791 -0.27071321  1.76534593]
 [-0.41492257 -1.96021819 -0.56510949  0.17831698  0.47612235 -1.10977781
   0.85774934  2.10870147 -0.67550689  1.49063873]]
<NDArray 2x10 @cpu(0)>

In [47]:
sample_yhat = softmax(sample_y_linear)
print(sample_yhat)


[[ 0.0437676   0.05377741  0.04761673  0.02232412  0.08753227  0.27998871
   0.05905489  0.03385746  0.04296366  0.32911706]
 [ 0.03293988  0.00702439  0.02834632  0.05961597  0.08029673  0.01644183
   0.11760787  0.4108828   0.02538351  0.22146073]]
<NDArray 2x10 @cpu(0)>


In [48]:
print(nd.sum(sample_yhat, axis=1))


[ 0.99999988  1.        ]
<NDArray 2 @cpu(0)>


In [49]:
def net(X):
    y_linear = nd.dot(X, W) + b
    yhat = softmax(y_linear)
    return yhat

In [50]:
def cross_entropy(yhat, y):
    return - nd.sum(y * nd.log(yhat), axis=0, exclude=True)

In [51]:
def SGD(params, lr):
    for param in params:
        param[:] = param - lr * param.grad

In [52]:
def evaluate_accuracy(data_iterator, net):
    numerator = 0.
    denominator = 0.
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx).reshape((-1,784))
        label = label.as_in_context(ctx)
        label_one_hot = nd.one_hot(label, 10)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        numerator += nd.sum(predictions == label)
        denominator += data.shape[0]
    return (numerator / denominator).asscalar()

In [55]:

evaluate_accuracy(test_data, net)

0.078500003

In [33]:
epochs = 10
moving_loss = 0.
learning_rate = .001
smoothing_constant = .01
niter=0

for e in range(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx).reshape((-1,784))
        label = label.as_in_context(ctx)
        label_one_hot = nd.one_hot(label, 10)
        with autograd.record():
            output = net(data)
            loss = cross_entropy(output, label_one_hot)
        loss.backward()
        SGD(params, learning_rate)

        ##########################
        #  Keep a moving average of the losses
        ##########################
        niter +=1
        moving_loss = (1 - smoothing_constant) * moving_loss + (smoothing_constant) * nd.mean(loss).asscalar()
        est_loss = moving_loss/(1-(1-smoothing_constant)**niter)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, est_loss, train_accuracy, test_accuracy))

Epoch 0. Loss: 2.00963916756, Train_acc 0.67255, Test_acc 0.6664


Epoch 1. Loss: 1.40902795835, Train_acc 0.713817, Test_acc 0.7067


Epoch 2. Loss: 1.19575714031, Train_acc 0.744633, Test_acc 0.7386


Epoch 3. Loss: 1.06863046523, Train_acc 0.75865, Test_acc 0.7537


Epoch 4. Loss: 0.983636726904, Train_acc 0.768517, Test_acc 0.7651


Epoch 5. Loss: 0.935403204531, Train_acc 0.780917, Test_acc 0.7772


Epoch 6. Loss: 0.8767110251, Train_acc 0.786683, Test_acc 0.7826


Epoch 7. Loss: 0.847657711157, Train_acc 0.7803, Test_acc 0.7803


Epoch 8. Loss: 0.815497110766, Train_acc 0.788783, Test_acc 0.7844


Epoch 9. Loss: 0.790368247338, Train_acc 0.798083, Test_acc 0.7969
