# This implementation is slow because GPU is not used

In [1]:
import numpy as np
import mnist # pip install mnist
import utils
import operations as op

# Configure

In [2]:
# Note:
np.random.seed(0)
batch_size = 100
total_iteration = 500

lr = 0.1 # learning rate
weight_decay = 0.0001
optimizer = 'gradient_descent'  # optional: momentum

# Define CNN model

In [3]:
# Note:
# Define CNN model modified from LeNet5
# os.FC : fully connected layer 

# Usage
# op.Convolution(kernel_size, channel_in, channel_out, stride, use_bias)
# op.Pooling(kernel_size, stride, pooling_type=['average' or 'max'])
# os.FC(node_in, node_out, use_bias)
model_np = [op.Convolution(3, 1, 2, 1, False),
            op.BatchNormalization(),
            op.ReLU(),

            op.Pool(3, 2, 'average'),
            op.ReLU(),

            op.Convolution(3, 2, 4, 1, False),
            op.BatchNormalization(),
            op.ReLU(),

            op.Pool(3, 2, 'average'),
            op.ReLU(),

            op.Convolution(3, 4, 128, 1, False),
            op.BatchNormalization(),
            op.ReLU(),

            op.FC(512, 32, False),
            op.BatchNormalization(),
            op.ReLU(),

            op.FC(32, 10, False)]
softmax_np = op.Softmax()
loss_np = op.CrossEntropy()  # loss
cost_np = op.Cost()

# Train

In [4]:
# prepare train data
train_images = mnist.train_images()
train_labels = mnist.train_labels()

args = {'lr': lr, 'is_train': True, 'optimizer': optimizer, 'weight_decay': weight_decay}
for iteration in range(total_iteration):
    random_indices = np.random.randint(0, len(train_images), batch_size)
    x_np = np.expand_dims(train_images[random_indices, ::] / 255.0, 3).astype(np.float32)
    y_np = utils.one_hot_encode(train_labels[random_indices], 10)
    args['y'] = y_np

    # forward
    out_np = x_np
    l2_loss = 0
    for layer in model_np:
        layer.forward(out_np, args)
        try:
            l2_loss += layer.l2_los
        except AttributeError:
            pass
        out_np = layer.value
    softmax_np.forward(out_np, args)
    loss_np.forward(softmax_np.value, args)
    cost_np.forward(loss_np.value, l2_loss)
    pred_np = np.argmax(model_np[-1].value, 1)
    accuracy_np = np.mean(pred_np == np.argmax(y_np, 1))
    
    # backward
    cost_np.backward()
    loss_np.backward(cost_np.grads)
    softmax_np.backward(loss_np.grads)
    grads_np = softmax_np.grads
    for layer in model_np[::-1]:
        layer.backward(grads_np, args)
        grads_np = layer.grads
    if (iteration+1) % 50 == 0:
        print("iter:%04d | cost:%.3f,  accuracy:%.2f" % (iteration+1, cost_np.value, accuracy_np))

iter:0050 | cost:0.285,  accuracy:0.95
iter:0100 | cost:0.256,  accuracy:0.94
iter:0150 | cost:0.199,  accuracy:0.93
iter:0200 | cost:0.188,  accuracy:0.95
iter:0250 | cost:0.132,  accuracy:0.97
iter:0300 | cost:0.172,  accuracy:0.94
iter:0350 | cost:0.094,  accuracy:0.98
iter:0400 | cost:0.085,  accuracy:0.97
iter:0450 | cost:0.141,  accuracy:0.96
iter:0500 | cost:0.086,  accuracy:0.99


# Test

In [5]:
# prepare test data
test_images = mnist.test_images()
test_labels = mnist.test_labels()
correctness = 0.0
args['is_train'] = False # to use moving mean and variance
for i, (test_img, test_label) in enumerate(zip(test_images, test_labels)):
    x_np = (test_img.reshape([1, 28, 28, 1]) / 255.0).astype(np.float32)
    y_np = utils.one_hot_encode(test_label.reshape(1, 1), 10)
    # forward
    out_np = x_np
    for layer in model_np:
        layer.forward(out_np, args)
        out_np = layer.value
    pred_np = np.argmax(out_np, 1)
    correctness += np.sum(pred_np == test_label)
    if (i+1) % 1000 == 0:
        print("Testing (%05d/%05d)"%(i+1, len(test_images)))
print("accuracy: %.3f" % (correctness / len(test_images)))

Testing (01000/10000)
Testing (02000/10000)
Testing (03000/10000)
Testing (04000/10000)
Testing (05000/10000)
Testing (06000/10000)
Testing (07000/10000)
Testing (08000/10000)
Testing (09000/10000)
Testing (10000/10000)
accuracy: 0.966
