# High-level CNTK MNIST Example

In [1]:
import numpy as np
import os
import cntk
from cntk.layers import Convolution2D, MaxPooling, Dense
from common.params import *
from common.utils import *

In [2]:
print(np.__version__)
print(cntk.__version__)

1.13.1
2.0


In [3]:
def create_model():
    # I need to fix all init distributions!!!!
    with cntk.layers.default_options(init = cntk.glorot_uniform(), activation = cntk.tanh):
        x = Convolution2D((5,5), 20)(features)
        x = MaxPooling((2,2), strides=(2,2), pad=False)(x)
        x = Convolution2D((5,5), 50)(x)
        x = MaxPooling((2,2), strides=(2,2), pad=False)(x)
        x = Dense(500)(x)
        x = Dense(N_CLASSES, activation=None)(x)
        return x

In [4]:
def init_model(m):
    # Loss
    loss = cntk.cross_entropy_with_softmax(m, labels)
    # Classification error
    label_error = cntk.classification_error(m, labels)
    # Init trainer object
    lr_schedule = cntk.learning_rate_schedule(LR, cntk.UnitType.minibatch) 
    m_schedule = cntk.momentum_schedule(MOMENTUM)
    # Where can I say no wd and no nesterov; where is momentum
    learner = cntk.momentum_sgd(m.parameters, lr=lr_schedule, momentum=m_schedule)
    trainer = cntk.Trainer(m, (loss, label_error), [learner])
    return trainer

In [5]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = mnist_for_library(channel_first=True, one_hot=True)
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)

CPU times: user 204 ms, sys: 248 ms, total: 452 ms
Wall time: 455 ms


In [6]:
# Placeholders
features = cntk.input_variable((1, 28, 28), np.float32)
labels = cntk.input_variable(N_CLASSES, np.float32)

In [7]:
%%time
# Initialise model
output = create_model()

CPU times: user 8 ms, sys: 52 ms, total: 60 ms
Wall time: 63.6 ms


In [8]:
%%time
trainer = init_model(output)

CPU times: user 8 ms, sys: 172 ms, total: 180 ms
Wall time: 192 ms


In [9]:
%%time
# Train model
for j in range(EPOCHS):
    for data, label in yield_mb(x_train, y_train, BATCHSIZE):
        trainer.train_minibatch({features: data, labels: label})
    # Log
    training_loss = trainer.previous_minibatch_loss_average
    eval_error = trainer.previous_minibatch_evaluation_average
    print("Epoch %d  |  Loss: %.6f  |  Error: %.6f" % (j+1, training_loss, eval_error))

Epoch 1  |  Loss: 0.218536  |  Error: 0.062500
Epoch 2  |  Loss: 0.100161  |  Error: 0.031250
Epoch 3  |  Loss: 0.059646  |  Error: 0.031250
Epoch 4  |  Loss: 0.046148  |  Error: 0.031250
Epoch 5  |  Loss: 0.040737  |  Error: 0.031250
Epoch 6  |  Loss: 0.037922  |  Error: 0.031250
Epoch 7  |  Loss: 0.035120  |  Error: 0.031250
Epoch 8  |  Loss: 0.032022  |  Error: 0.031250
Epoch 9  |  Loss: 0.028396  |  Error: 0.031250
Epoch 10  |  Loss: 0.025466  |  Error: 0.031250
Epoch 11  |  Loss: 0.022921  |  Error: 0.000000
Epoch 12  |  Loss: 0.020539  |  Error: 0.000000
CPU times: user 58.7 s, sys: 9.16 s, total: 1min 7s
Wall time: 1min 9s


In [10]:
%%time
# Predict and then score accuracy
z = cntk.softmax(output)
y_guess = np.zeros(y_test.shape[0], dtype=np.int)
y_truth = np.zeros(y_test.shape[0], dtype=np.int)

c = 0
for data, label in yield_mb(x_test, y_test, BATCHSIZE):
    predicted_label_probs = z.eval({features : data})
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(predicted_label_probs, axis=-1)
    y_truth[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(label, axis=-1)                               
    c += 1

CPU times: user 320 ms, sys: 132 ms, total: 452 ms
Wall time: 468 ms


In [11]:
print("Accuracy: ", sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.9866
