In [1]:
# 丢弃法（Dropout）— 从0开始
# http://zh.gluon.ai/chapter_supervised-learning/dropout-scratch.html

from mxnet import nd

def dropout(X, drop_probability):
    keep_probability = 1 - drop_probability
    assert 0 <= keep_probability <= 1
    
    # 这种情况下把全部元素都丢弃。
    if keep_probability == 0:
        return X.zeros_like()
    
    # 随机选择一部分该层的输出作为丢弃元素。
    mask = nd.random.uniform(0, 1.0, X.shape, ctx=X.context) < keep_probability
    # TODO: device context?
    
    # 保证 E[dropout(X)] == X
    scale = 1 / keep_probability
    
    return mask * X * scale

  from ._conv import register_converters as _register_converters


In [2]:
A = nd.arange(20).reshape(5, 4)
dropout(A, 0.0)


[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]
 [12. 13. 14. 15.]
 [16. 17. 18. 19.]]
<NDArray 5x4 @cpu(0)>

In [3]:
dropout(A, 0.5)


[[ 0.  0.  0.  6.]
 [ 0. 10.  0.  0.]
 [16. 18. 20.  0.]
 [24. 26.  0.  0.]
 [ 0. 34.  0.  0.]]
<NDArray 5x4 @cpu(0)>

In [4]:
dropout(A, 1.0)


[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
<NDArray 5x4 @cpu(0)>

In [5]:
# 数据获取
import sys
sys.path.append('..')
import utils
from mxnet import gluon

batch_size = 256
def transform(data, label):
#     print(data.shape)
#     print(label.shape)
#     return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
    return data.astype('float32') / 255, label.astype('float32')
mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)
train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

  label = np.fromstring(fin.read(), dtype=np.uint8).astype(np.int32)
  data = np.fromstring(fin.read(), dtype=np.uint8)


In [6]:
num_inputs = 28*28
num_outputs = 10

num_hidden1 = 256
num_hidden2 = 256
weight_scale = 0.01

W1 = nd.random_normal(shape=(num_inputs, num_hidden1), scale=weight_scale)
b1 = nd.zeros(num_hidden1)

W2 = nd.random_normal(shape=(num_hidden1, num_hidden2), scale=weight_scale)
b2 = nd.zeros(num_hidden2)

W3 = nd.random_normal(shape=(num_hidden2, num_outputs), scale=weight_scale)
b3 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2, W3, b3]

for param in params:
    param.attach_grad()

In [10]:
# 定义包含丢弃层的模型
drop_prob1 = 0.2
drop_prob2 = 0.5

def net(X, is_training=False):
    X = X.reshape((-1, num_inputs))
    # 第一层全连接。
    h1 = nd.relu(nd.dot(X, W1) + b1)
    # 在第一层全连接后添加丢弃层。
    if is_training:
        h1 = dropout(h1, drop_prob1)
    
    # 第二层全连接。
    h2 = nd.relu(nd.dot(h1, W2) + b2)
    if is_training:
        h2 = dropout(h2, drop_prob2)
    
    return nd.dot(h2, W3) + b3

In [11]:
# 训练
from mxnet import autograd
from mxnet import gluon

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

learning_rate = .5

for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        with autograd.record():
            output = net(data, is_training=True)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        utils.SGD(params, learning_rate / batch_size)
        
        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)
    
    test_acc = utils.evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
        epoch, train_loss/len(train_data),
        train_acc/len(train_data), test_acc))

Epoch 0. Loss: 0.398257, Train acc 0.854482, Test acc 0.870605
Epoch 1. Loss: 0.383243, Train acc 0.860799, Test acc 0.873437
Epoch 2. Loss: 0.372968, Train acc 0.863630, Test acc 0.878516
Epoch 3. Loss: 0.364133, Train acc 0.865564, Test acc 0.881445
Epoch 4. Loss: 0.346770, Train acc 0.872961, Test acc 0.885645
