In [1]:
import numpy as np
import mnist # pip install mnist
import utils
import operations as op

# Configure

In [2]:
# Note:
# increase total_iteration to get a higher accuracy
np.random.seed(0)
batch_size = 100
total_iteration = 1000

lr = 0.1 # learning rate
weight_decay = 0.0
optimizer = 'gradient_descent' # optional: momentum

# Define MLP model

In [3]:
# Note:
# os.FC : fully connected layer

# Usage:
# os.FC(node_in, node_out, use_bias)


model_np = [op.FC(784, 32, False),
            op.BatchNormalization(),
            op.ReLU(),
            op.FC(32, 10, False),
            op.Softmax()]
softmax_np = op.Softmax()
loss_np = op.CrossEntropy() # loss
cost_np = op.Cost() # average of losses over batch

# Train

In [4]:
# prepare train data
train_images = mnist.train_images()
train_labels = mnist.train_labels()

args = {'lr': lr, 'is_train': True, 'optimizer': optimizer, 'weight_decay': weight_decay}
for iteration in range(total_iteration):
    random_indices = np.random.randint(0, len(train_images), batch_size)
    x_np = train_images[random_indices, ::].reshape(batch_size, 784) / 255.0
    y_np = utils.one_hot_encode(train_labels[random_indices], 10)
    args['y'] = y_np

    # forward
    out_np = x_np
    l2_loss = 0
    for layer in model_np:
        layer.forward(out_np, args)
        try:
            l2_loss += layer.l2_los
        except AttributeError:
            pass
        out_np = layer.value
    softmax_np.forward(out_np, args)
    loss_np.forward(softmax_np.value, args)
    cost_np.forward(loss_np.value, l2_loss)
    pred_np = np.argmax(model_np[-1].value, 1)
    accuracy_np = np.mean(pred_np == np.argmax(y_np, 1))
    
    # backward
    cost_np.backward()
    loss_np.backward(cost_np.grads)
    softmax_np.backward(loss_np.grads)
    grads_np = softmax_np.grads
    for layer in model_np[::-1]:
        layer.backward(grads_np, args)
        grads_np = layer.grads
    if (iteration+1) % 100 == 0:
        print("iter:%04d | cost:%.3f,  accuracy:%.2f" % (iteration+1, cost_np.value, accuracy_np))

iter:0100 | cost:1.889,  accuracy:0.69
iter:0200 | cost:1.753,  accuracy:0.83
iter:0300 | cost:1.709,  accuracy:0.85
iter:0400 | cost:1.686,  accuracy:0.88
iter:0500 | cost:1.647,  accuracy:0.86
iter:0600 | cost:1.617,  accuracy:0.90
iter:0700 | cost:1.634,  accuracy:0.87
iter:0800 | cost:1.582,  accuracy:0.92
iter:0900 | cost:1.600,  accuracy:0.88
iter:1000 | cost:1.605,  accuracy:0.87


# Test

In [5]:
# prepare test data
test_images = mnist.test_images()
test_labels = mnist.test_labels()
correctness = 0.0
args['is_train'] = False # to use moving mean and variance
for i, (test_img, test_label) in enumerate(zip(test_images, test_labels)):
    x_np = (test_img.reshape([1, 784]) / 255.0).astype(np.float32)
    y_np = utils.one_hot_encode(test_label.reshape(1, 1), 10)
    # forward
    out_np = x_np
    for layer in model_np:
        layer.forward(out_np, args)
        out_np = layer.value
    pred_np = np.argmax(out_np, 1)
    correctness += np.sum(pred_np == test_label)
    if (i+1) % 1000 == 0:
        print("Testing (%05d/%05d)"%(i+1, len(test_images)))
print("accuracy: %.3f" % (correctness / len(test_images)))

Testing (01000/10000)
Testing (02000/10000)
Testing (03000/10000)
Testing (04000/10000)
Testing (05000/10000)
Testing (06000/10000)
Testing (07000/10000)
Testing (08000/10000)
Testing (09000/10000)
Testing (10000/10000)
accuracy: 0.920
