# 3.2 多类逻辑回归-gluon

In [1]:
"""
1. 获取和读取数据
"""
import sys
sys.path.append('..')
import utils

batch_size = 256
train_data, test_data = utils.load_data_fashion_mnist(batch_size)

In [2]:
"""
2. 定义和初始化模型
利用Flatten层将输入数据变换成batch_size X ? 的大小，在输入到输出大小为10的全连接层，不要指定中间层的大小，gluon会自动推导
"""
from mxnet import gluon

net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(10))
net.initialize()

In [3]:
"""
3. softmax和交叉熵损失
在softmax regression 从0开始一节中，我们增大学习率时，会导致反向求导过程中数据溢出，
主要原因是分开定义Softmax和交叉熵这样会有数值不稳定性，在分开计算过程中增加计算次数，在浮点数运算过程中，会有累积误差产生。
因此gluon提供一个将这两个函数合起来的数值更稳定的版本
"""
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [None]:
"""
softmax 与 cross entropy 分开与结合版
"""
# function softmax(z)
#   #z = z - maximum(z)
#   o = exp(z)
#   return o / sum(o)
# end
# function gradient_together(z, y)
#   o = softmax(z)
#   o[y] -= 1.0
#   return o
# end
# function gradient_separated(z, y)
#   o = softmax(z)
#   ∂o_∂z = diagm(o) - o*o'
#   ∂f_∂o = zeros(size(o))
#   ∂f_∂o[y] = -1.0 / o[y]
#   return ∂o_∂z * ∂f_∂o
# end

In [6]:
help(softmax_cross_entropy)

Help on SoftmaxCrossEntropyLoss in module mxnet.gluon.loss object:

class SoftmaxCrossEntropyLoss(Loss)
 |  Computes the softmax cross entropy loss. (alias: SoftmaxCELoss)
 |  
 |  If `sparse_label` is `True` (default), label should contain integer
 |  category indicators:
 |  
 |  .. math::
 |  
 |      \DeclareMathOperator{softmax}{softmax}
 |  
 |      p = \softmax({pred})
 |  
 |      L = -\sum_i \log p_{i,{label}_i}
 |  
 |  `label`'s shape should be `pred`'s shape with the `axis` dimension removed.
 |  i.e. for `pred` with shape (1,2,3,4) and `axis = 2`, `label`'s shape should
 |  be (1,2,4).
 |  
 |  If `sparse_label` is `False`, `label` should contain probability distribution
 |  and `label`'s shape should be the same with `pred`:
 |  
 |  .. math::
 |  
 |      p = \softmax({pred})
 |  
 |      L = -\sum_i \sum_j {label}_j \log p_{ij}
 |  
 |  Parameters
 |  ----------
 |  axis : int, default -1
 |      The axis to sum over when computing softmax and entropy.
 |  sparse_label 

In [4]:
"""
4. 优化
"""
trainer = gluon.Trainer(net.collect_params(),optimizer='sgd',optimizer_params={'learning_rate': 10.})

In [5]:
"""
5. 训练
"""
import mxnet.ndarray as nd
from mxnet import autograd

for epoch in range(5):
    train_acc = 0.
    train_loss = 0.
    for data, label in train_data:
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)
        
        train_acc += utils.accuracy(output, label)
        train_loss += nd.mean(loss).asscalar()
        
    test_acc = utils.evaluate_accuracy(test_data, net)
    print('Epoch %d. Loss:%f, Train accuracy: %f, Test accuracy: %f.'%(epoch, train_loss/len(train_data),
                                                                      train_acc/len(train_data), test_acc))

Epoch 0. Loss:36.672965, Train accuracy: 0.684395, Test accuracy: 0.659255.
Epoch 1. Loss:20.201806, Train accuracy: 0.755142, Test accuracy: 0.786859.
Epoch 2. Loss:17.588913, Train accuracy: 0.771267, Test accuracy: 0.782953.
Epoch 3. Loss:16.779442, Train accuracy: 0.778112, Test accuracy: 0.744491.
Epoch 4. Loss:16.465251, Train accuracy: 0.781267, Test accuracy: 0.701623.
