In [1]:
from mxnet import nd, gluon
from mxnet import autograd as ag
import mxnet as mx
import utils

In [2]:
batch_size = 256
epochs = 50
weight_scale = 0.01
learning_rate = 0.2
ctx = utils.try_gpu()

In [3]:
train_data, test_data = utils.load_data_fashion_mnist(batch_size)

W1 = nd.random_normal(shape=(20, 1, 5, 5), scale=weight_scale, ctx=ctx)
b1 = nd.zeros(W1.shape[0], ctx=ctx)

W2 = nd.random_normal(shape=(50, 20, 3, 3), scale=weight_scale, ctx=ctx)
b2 = nd.zeros(W2.shape[0], ctx=ctx)

W3 = nd.random_normal(shape=(1250, 128), scale=weight_scale, ctx=ctx)
b3 = nd.zeros(W3.shape[1], ctx=ctx)

W4 = nd.random_normal(shape=(128, 10), scale=weight_scale, ctx=ctx)
b4 = nd.zeros(shape=W4.shape[1], ctx=ctx)

paramas = [W1,b1,W2,b2,W3,b3,W4,b4]

for param in paramas:
    param.attach_grad()




In [4]:
def net(X, verbose=False):
#     X = X.as_in_context(ctx)
    print('X shape:', X.shape) if verbose else None
    h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=W1.shape[2:],num_filter=W1.shape[0])
    print('h1 convolution shape:', h1_conv.shape) if verbose else None
    h1_relu = nd.relu(h1_conv)
    h1 = nd.Pooling(data=h1_relu, pool_type='max', kernel=(2, 2), stride=(2,2) )
    print('h1 polling shape:', h1.shape) if verbose else None
    
    h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=W2.shape[2:], num_filter=W2.shape[0])
    print('h2 convolution shape:', h2_conv.shape) if verbose else None
    h2_relu = nd.relu(h2_conv)
    h2 = nd.Pooling(data=h2_relu, pool_type='max', kernel=(2, 2), stride=(2, 2))
    print('h2 polling shape:', h2_relu.shape) if verbose else None
    h2 = h2.flatten()
    print('h2 flatten shape:', h2.shape) if verbose else None
    
    
    h3_linear = nd.dot(h2, W3) + b3
    h3 = nd.relu(h3_linear)
    print('h3 shape:', h3.shape) if verbose else None
    
    h4_linear = nd.dot(h3, W4) + b4
    print('h4  shape:', h4_linear.shape) if verbose else None
    
    return h4_linear


In [5]:
for X, _ in train_data:
    print(net(X, verbose=True))
    break

X shape: (256, 1, 28, 28)
h1 convolution shape: (256, 20, 24, 24)
h1 polling shape: (256, 20, 12, 12)
h2 convolution shape: (256, 50, 10, 10)
h2 polling shape: (256, 50, 10, 10)
h2 flatten shape: (256, 1250)
h3 shape: (256, 128)
h4  shape: (256, 10)

[[  9.00049599e-06  -6.47684237e-06  -7.64564320e-05 ...,  -5.99803170e-05
   -3.85270323e-05   2.01009007e-05]
 [ -3.44139553e-05  -3.96617907e-06  -8.17592081e-05 ...,   1.81698288e-05
   -7.72076601e-05   2.07855373e-05]
 [ -2.88460888e-05   1.78134687e-05  -8.40852008e-05 ...,  -4.27798586e-05
   -6.68772263e-05   1.48699255e-05]
 ..., 
 [ -2.75682578e-05   7.16838349e-06  -9.73502974e-05 ...,  -5.10061618e-05
   -3.83101105e-05   2.92994196e-06]
 [ -8.35243191e-05   2.21156170e-05  -8.32318474e-05 ...,  -9.82839765e-07
   -9.14775592e-05   2.03187465e-05]
 [ -6.66341439e-05   4.32687739e-06  -6.81044985e-05 ...,  -3.81039354e-05
   -7.40362884e-05   1.87062869e-05]]
<NDArray 256x10 @cpu(0)>


In [None]:
loss_SF_CE = gluon.loss.SoftmaxCrossEntropyLoss()

for e in range(epochs):
    train_loss = 0
    train_acc = 0
    for X, y in train_data:
        X = X.as_in_context(ctx)
        y = y.as_in_context(ctx)
        with ag.record():
            output = net(X)
            loss = loss_SF_CE(output, y)
        loss.backward()
        utils.SGD(paramas, learning_rate/ batch_size)
        
        train_loss += loss.mean().asscalar()
        train_acc += utils.accuracy(output, y)
    test_acc = utils.evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
    e, train_loss/len(train_data),
    train_acc/len(train_data), test_acc))
        

Epoch 0. Loss: 2.302227, Train acc 0.107873, Test acc 0.286859
Epoch 1. Loss: 1.475448, Train acc 0.446748, Test acc 0.652444
Epoch 2. Loss: 0.638800, Train acc 0.749666, Test acc 0.778245
Epoch 3. Loss: 0.509663, Train acc 0.803786, Test acc 0.835537
Epoch 4. Loss: 0.452598, Train acc 0.831130, Test acc 0.833333
Epoch 5. Loss: 0.412683, Train acc 0.850361, Test acc 0.848157
Epoch 6. Loss: 0.378172, Train acc 0.860243, Test acc 0.869291
Epoch 7. Loss: 0.353821, Train acc 0.870793, Test acc 0.875501
Epoch 8. Loss: 0.338102, Train acc 0.876536, Test acc 0.867989
Epoch 9. Loss: 0.326730, Train acc 0.880292, Test acc 0.879908
Epoch 10. Loss: 0.311558, Train acc 0.885116, Test acc 0.872696
Epoch 11. Loss: 0.303004, Train acc 0.887987, Test acc 0.865986
Epoch 12. Loss: 0.296622, Train acc 0.889573, Test acc 0.884615
