In [1]:
import sys
sys.path.insert(0, '..')

import d2l
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn

def dropout(X, drop_prob):
    assert 0 <= drop_prob <= 1
    # In this case, all elements are dropped out
    if drop_prob == 1:
        return X.zeros_like()
    mask = nd.random.uniform(0, 1, X.shape) > drop_prob
    return mask * X / (1.0-drop_prob)

In [9]:
X = nd.arange(16).reshape((2, 8))
print(dropout(X, 0))
print(dropout(X, 0.5))
print(dropout(X, 1))


[[ 0.  1.  2.  3.  4.  5.  6.  7.]
 [ 8.  9. 10. 11. 12. 13. 14. 15.]]
<NDArray 2x8 @cpu(0)>

[[ 0.  0.  0.  0.  8. 10. 12.  0.]
 [16.  0. 20. 22.  0.  0.  0. 30.]]
<NDArray 2x8 @cpu(0)>

[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]
<NDArray 2x8 @cpu(0)>


In [19]:
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2, W3, b3]
for param in params:
    param.attach_grad()

In [20]:
drop_prob1, drop_prob2 = 0.2, 0.5

def net(X):
    X = X.reshape((-1, num_inputs))
    H1 = (nd.dot(X, W1) + b1).relu()
    # Use dropout only when training the model
    if autograd.is_training():
        # Add a dropout layer after the first fully connected layer
        H1 = dropout(H1, drop_prob1)
    H2 = (nd.dot(H1, W2) + b2).relu()
    if autograd.is_training():
        # Add a dropout layer after the second fully connected layer
        H2 = dropout(H2, drop_prob2)
    return nd.dot(H2, W3) + b3

In [22]:
num_epochs, lr, batch_size = 10, 0.5, 256
loss = gloss.SoftmaxCrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params, lr)

epoch 1, loss 0.3358, train acc 0.876, test acc 0.874
epoch 2, loss 0.3298, train acc 0.880, test acc 0.885
epoch 3, loss 0.3245, train acc 0.882, test acc 0.879
epoch 4, loss 0.3143, train acc 0.884, test acc 0.888
epoch 5, loss 0.3093, train acc 0.886, test acc 0.883
epoch 6, loss 0.3067, train acc 0.887, test acc 0.886
epoch 7, loss 0.2978, train acc 0.890, test acc 0.886
epoch 8, loss 0.2916, train acc 0.891, test acc 0.891
epoch 9, loss 0.2894, train acc 0.892, test acc 0.887
epoch 10, loss 0.2852, train acc 0.894, test acc 0.891


In [25]:
#Concise Implementation
net = nn.Sequential()
net.add(nn.Dense(256, activation="relu"),
        # Add a dropout layer after the first fully connected layer
        nn.Dropout(drop_prob1),
        nn.Dense(256, activation="relu"),
        # Add a dropout layer after the second fully connected layer
        nn.Dropout(drop_prob2),
        nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
d2l.train_ch3(net, train_iter, test_iter, loss, 20, batch_size, None,
              None, trainer)

epoch 1, loss 1.1262, train acc 0.561, test acc 0.746
epoch 2, loss 0.5768, train acc 0.787, test acc 0.833
epoch 3, loss 0.4928, train acc 0.821, test acc 0.851
epoch 4, loss 0.4455, train acc 0.837, test acc 0.860
epoch 5, loss 0.4155, train acc 0.849, test acc 0.859
epoch 6, loss 0.3943, train acc 0.857, test acc 0.868
epoch 7, loss 0.3814, train acc 0.862, test acc 0.872
epoch 8, loss 0.3649, train acc 0.866, test acc 0.875
epoch 9, loss 0.3560, train acc 0.869, test acc 0.877
epoch 10, loss 0.3417, train acc 0.874, test acc 0.869
epoch 11, loss 0.3345, train acc 0.877, test acc 0.877
epoch 12, loss 0.3267, train acc 0.880, test acc 0.877
epoch 13, loss 0.3209, train acc 0.883, test acc 0.888
epoch 14, loss 0.3124, train acc 0.885, test acc 0.884
epoch 15, loss 0.3078, train acc 0.887, test acc 0.885
epoch 16, loss 0.3055, train acc 0.888, test acc 0.889
epoch 17, loss 0.2952, train acc 0.891, test acc 0.887
epoch 18, loss 0.2925, train acc 0.893, test acc 0.884
epoch 19, loss 0.28