In [42]:
import os
import numpy as np
from sklearn.datasets        import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.preprocessing   import label_binarize

PIXEL_RANGE = range(0, 255 + 1)
IMAGE_RANGE = range(0, 9 + 1)

def process(mnist):
    # Process dataset to return features and labels for CNN
    def features():
        # Transform features to be float32 sets of 1x28x28 
        # tensors with normalized pixel values.
        return np.divide(
            mnist.data, PIXEL_RANGE[-1]
        ).astype(np.float32).reshape((-1, 1, 28, 28))
    def labels():
        # Transform labels to be float32 sets of 1x10
        # tensors that are one-hot encoded.
        return label_binarize(
            mnist.target, classes=IMAGE_RANGE
        ).astype(np.float32).reshape((-1, 10))
    return features(), labels()


# Download the MNIST dataset
mnist            = fetch_mldata('MNIST original', data_home='.')
features, labels = process(mnist)

# Split train and test data
train_features, test_features, train_labels, test_labels = train_test_split(features, labels)

# Display feature and label shapes
print('features:', features.shape, 'labels:', labels.shape)

features: (70000, 1, 28, 28) labels: (70000, 10)


In [43]:
# Import CNTK
import cntk as c

# Display CNTK version
' '.join([
    c.__name__.upper(),
    c.__version__,
    str(c.device.all_devices()[0])
])

'CNTK 2.0 CPU'

In [44]:
class ConvNet():
    IMAGE_SHAPE       = (1, 28, 28)
    IMAGE_CLASSES     = [n for n in IMAGE_RANGE]
    IMAGE_CLASS_COUNT = len(IMAGE_CLASSES)
    LEARNING_RATE     = [0.2] * 20 + [0.1] * 20 + [0.0001] * 20 + [0.00001] * 20
    EPOCH_COUNT       = 80
    BATCH_SIZE        = 512
    
    def __init__(self):
        self._build_inputs()
        self._build_layers()
        self._build_stack()
        self._build_trainer()
    
    def evaluate(self, fn, feeds):
        for epoch in range(self.EPOCH_COUNT):
            batch = self.batch(epoch, feeds)
            fn(epoch, batch)
    
    def train(self, data):
        self.trainer.train_minibatch(data)
    
    def log_progress(self):
        self.trainer.summarize_training_progress()
    
    def log_parameters(self):
        c.logging.log_number_of_parameters(self.stack)
        print()
    
    def checkpoint(self, version):
        file_names = [
            self.__class__.__name__, 'MNIST', '{}.dnn'.format(version)
        ]
        self.stack.save(
            os.path.join('.', 'checkpoints', '_'.join(file_names))
        )
    
    def batch(self, epoch, data):
        def chunk(data):
            slice_begin = epoch * self.BATCH_SIZE
            slice_end   = slice_begin + self.BATCH_SIZE
            return data[slice_begin:slice_end]
        return {key: chunk(value) for key, value in data.items()}
    
    def _build_inputs(self):
        self.features = c.input_variable(self.IMAGE_SHAPE,       np.float32, name='features')
        self.labels   = c.input_variable(self.IMAGE_CLASS_COUNT, np.float32, name='labels')
    
    def _build_layers(self):
        with c.layers.default_options(activation=c.ops.relu, pad=False):
            self.layers  = [
                c.layers.Convolution2D((5,5), 32, pad=True),
                c.layers.MaxPooling((3,3), (2,2)),
                c.layers.Convolution2D((3,3), 48),
                c.layers.MaxPooling((3,3), (2,2)),
                c.layers.Convolution2D((3,3), 64),
                c.layers.Dense(96),
                c.layers.Dropout(0.8),
                c.layers.Dense(self.IMAGE_CLASS_COUNT, activation=None)
            ]
    
    def _build_stack(self):
        self.stack = self.features
        for layer in self.layers: self.stack = layer(self.stack)
        self.loss  = c.losses.cross_entropy_with_softmax(self.stack, self.labels)
        self.error = c.metrics.classification_error(self.stack, self.labels)
    
    def _build_trainer(self):
        schedule     = c.learning_rate_schedule(self.LEARNING_RATE, c.UnitType.minibatch)
        learner      = c.learners.sgd(self.stack.parameters, schedule)
        printer      = c.logging.ProgressPrinter(tag='Training', num_epochs=self.EPOCH_COUNT)
        self.trainer = c.Trainer(self.stack, (self.loss, self.error), learner, printer)


In [45]:
cnn = ConvNet()
def train(epoch, batch_map):
    cnn.train(batch_map)
    cnn.log_progress()
cnn.log_parameters()
cnn.evaluate(train, {'features': train_features, 'labels': train_labels})
cnn.checkpoint('train')

Training 98778 parameters in 10 parameter tensors.

Learning rate per minibatch: 0.2
Finished Epoch[1 of 80]: [Training] loss = 2.335645 * 512, metric = 90.43% * 512 3.056s (167.5 samples/s);
Finished Epoch[2 of 80]: [Training] loss = 2.299878 * 512, metric = 89.26% * 512 1.133s (451.9 samples/s);
Finished Epoch[3 of 80]: [Training] loss = 2.288602 * 512, metric = 86.72% * 512 0.904s (566.4 samples/s);
Finished Epoch[4 of 80]: [Training] loss = 2.279175 * 512, metric = 84.77% * 512 1.049s (488.1 samples/s);
Finished Epoch[5 of 80]: [Training] loss = 2.272207 * 512, metric = 84.96% * 512 1.006s (508.9 samples/s);
Finished Epoch[6 of 80]: [Training] loss = 2.275580 * 512, metric = 84.57% * 512 0.905s (565.7 samples/s);
Finished Epoch[7 of 80]: [Training] loss = 2.259426 * 512, metric = 83.40% * 512 1.824s (280.7 samples/s);
Finished Epoch[8 of 80]: [Training] loss = 2.260013 * 512, metric = 83.20% * 512 1.168s (438.4 samples/s);
Finished Epoch[9 of 80]: [Training] loss = 2.237940 * 512, 

Finished Epoch[77 of 80]: [Training] loss = 0.698961 * 512, metric = 21.09% * 512 0.965s (530.6 samples/s);
Finished Epoch[78 of 80]: [Training] loss = 0.668837 * 512, metric = 20.90% * 512 0.921s (555.9 samples/s);
Finished Epoch[79 of 80]: [Training] loss = 0.664158 * 512, metric = 23.63% * 512 0.913s (560.8 samples/s);
Finished Epoch[80 of 80]: [Training] loss = 0.708523 * 512, metric = 23.83% * 512 0.913s (560.8 samples/s);
