#  一. 手工版本

1.1导入数据：

In [None]:
import sys
sys.path.append('..')
import utils

batch_size = 256
train_data, test_data = utils.load_data_fashion_mnist(batch_size)

1.2初始化参数：

In [None]:
from mxnet import ndarray as nd
num_inputs = 28*28
num_outputs = 10

num_hidden = 256
weight_scale = .01

#定义一个只有一个隐含层的模型，
#这个隐含层输出256个节点。

W1 = nd.random_normal(shape=(num_inputs, num_hidden), scale=weight_scale)
b1 = nd.zeros(num_hidden)

W2 = nd.random_normal(shape=(num_hidden, num_outputs), scale=weight_scale)
b2 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2]

for param in params:
    param.attach_grad()

1.3激活函数

In [None]:
def relu(x):
    return nd.maximum(x,0)

1.4定义模型

In [None]:
def net(X):
    X = X.reshape((-1, num_inputs))
    h1 = relu(nd.dot(X, W1) + b1)
    output = nd.dot(h1, W2) + b2
    return output

1.5Softmax和交叉熵损失函数

In [None]:
from mxnet import gluon
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

1.6训练

In [None]:
from mxnet import autograd as autograd

learning_rate = .5

for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        utils.SGD(params, learning_rate/batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)

    test_acc = utils.evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
        epoch, train_loss/len(train_data),
        train_acc/len(train_data), test_acc))

# 二. 使用GLUON

2.1定义模型

In [None]:
from mxnet import gluon

net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(256, activation="relu"))
    net.add(gluon.nn.Dense(10))
net.initialize()

2.2读取数据并训练

In [None]:
import sys
sys.path.append('..')
from mxnet import ndarray as nd
from mxnet import autograd
import utils


batch_size = 256
train_data, test_data = utils.load_data_fashion_mnist(batch_size)

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})

for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)

    test_acc = utils.evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
        epoch, train_loss/len(train_data), train_acc/len(train_data), test_acc))

In [36]:
import numpy as np
a = np.array([[1,2],[3,4]])
b = np.array([[1,2],[3,4]])
print (a**2)
print ((a**2).sum() + b)

[[ 1  4]
 [ 9 16]]
[[31 32]
 [33 34]]


In [43]:
from mxnet import nd

def dropout(X, drop_probability):
    keep_probability = 1 - drop_probability
    assert 0 <= keep_probability <= 1
    # 这种情况下把全部元素都丢弃。
    if keep_probability == 0:
        return X.zeros_like()

    # 随机选择一部分该层的输出作为丢弃元素。
    mask = nd.random.uniform(
        0, 1.0, X.shape, ctx=X.context) < keep_probability
    # 保证 E[dropout(X)] == X
    scale =  1 / keep_probability
    return X, mask * X,mask * X * scale

A = nd.arange(20).reshape((5,4))
dropout(A, 0.5)

(
 [[  0.   1.   2.   3.]
  [  4.   5.   6.   7.]
  [  8.   9.  10.  11.]
  [ 12.  13.  14.  15.]
  [ 16.  17.  18.  19.]]
 <NDArray 5x4 @cpu(0)>, 
 [[  0.   0.   0.   0.]
  [  4.   5.   6.   7.]
  [  0.   9.   0.  11.]
  [ 12.   0.   0.  15.]
  [  0.  17.  18.   0.]]
 <NDArray 5x4 @cpu(0)>, 
 [[  0.   0.   0.   0.]
  [  8.  10.  12.  14.]
  [  0.  18.   0.  22.]
  [ 24.   0.   0.  30.]
  [  0.  34.  36.   0.]]
 <NDArray 5x4 @cpu(0)>)

In [46]:
from mxnet.gluon import nn
class MyDense(nn.Block):
    def __init__(self, units, in_units, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        with self.name_scope():
            self.weight = self.params.get(
                'weight', shape=(in_units, units))
            self.bias = self.params.get('bias', shape=(units,))

    def forward(self, x):
        linear = nd.dot(x, self.weight.data()) + self.bias.data()
        return nd.relu(linear)

dense = MyDense(5, in_units=10)
print (dense.params)
den = MyDense(5, in_units=10, prefix='o_my_dense_')
print (den.params)

mydense2_ (
  Parameter mydense2_weight (shape=(10, 5), dtype=<class 'numpy.float32'>)
  Parameter mydense2_bias (shape=(5,), dtype=<class 'numpy.float32'>)
)
o_my_dense_ (
  Parameter o_my_dense_weight (shape=(10, 5), dtype=<class 'numpy.float32'>)
  Parameter o_my_dense_bias (shape=(5,), dtype=<class 'numpy.float32'>)
)
