In [42]:
import os
import numpy as np
from sklearn.datasets        import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.preprocessing   import label_binarize

PIXEL_RANGE = range(0, 255 + 1)
IMAGE_RANGE = range(0, 9 + 1)

def process(mnist):
    # Process dataset to return features and labels for CNN
    def features():
        # Transform features to be float32 sets of 1x28x28 
        # tensors with normalized pixel values.
        return np.divide(
            mnist.data, PIXEL_RANGE[-1]
        ).astype(np.float32).reshape((-1, 1, 28, 28))
    def labels():
        # Transform labels to be float32 sets of 1x10
        # tensors that are one-hot encoded.
        return label_binarize(
            mnist.target, classes=IMAGE_RANGE
        ).astype(np.float32).reshape((-1, 10))
    return features(), labels()


# Download the MNIST dataset
mnist            = fetch_mldata('MNIST original', data_home='.')
features, labels = process(mnist)

# Split train and test data
train_features, test_features, train_labels, test_labels = train_test_split(features, labels)

# Display feature and label shapes
print('features:', features.shape, 'labels:', labels.shape)

features: (70000, 1, 28, 28) labels: (70000, 10)


In [43]:
# Import CNTK
import cntk as c

# Display CNTK version
' '.join([
    c.__name__.upper(),
    c.__version__,
    str(c.device.all_devices()[0])
])

'CNTK 2.0 CPU'

In [52]:
class ConvNet():
    IMAGE_SHAPE         = (1, 28, 28)
    IMAGE_CLASSES       = [n for n in IMAGE_RANGE]
    IMAGE_CLASS_COUNT   = len(IMAGE_CLASSES)
    LEARNING_RATE       = [0.2] * 20 + [0.1] * 20 + [0.0001] * 20 + [0.00001] * 20
    DEFAULT_EPOCH_COUNT = 80
    DEFAULT_BATCH_SIZE  = 512
    
    def __init__(self, epoch_count=DEFAULT_EPOCH_COUNT, batch_size=DEFAULT_BATCH_SIZE):
        self.epoch_count = epoch_count
        self.batch_size  = batch_size
        self._build()
    
    def evaluate(self, fn, feeds):
        for epoch in range(self.epoch_count):
            batch = self.batch(epoch, feeds)
            fn(epoch, batch)
    
    def train(self, data):
        self.trainer.train_minibatch(data)
    
    def test(self, data):
        self.trainer.test_minibatch(data)
    
    def log_progress(self):
        self.trainer.summarize_training_progress()
    
    def log_parameters(self):
        c.logging.log_number_of_parameters(self.stack)
        print()
    
    def save(self, version):
        self.stack.save(self._to_file(version))
    
    def restore(self, version):
        self.stack.restore(self._to_file(version))
    
    def batch(self, epoch, data):
        def chunk(data):
            slice_begin = epoch * self.batch_size
            slice_end   = slice_begin + self.batch_size
            return data[slice_begin:slice_end]
        return {key: chunk(value) for key, value in data.items()}
    
    def _to_file(self, name):
        file_names = [
            self.__class__.__name__, 'MNIST', '{}.dnn'.format(name)
        ]
        return os.path.join(
            '.', 'checkpoints', '_'.join(file_names)
        )
    
    def _build(self):
        self._build_inputs()
        self._build_layers()
        self._build_stack()
        self._build_trainer()
    
    def _build_inputs(self):
        self.features = c.input_variable(self.IMAGE_SHAPE,       np.float32, name='features')
        self.labels   = c.input_variable(self.IMAGE_CLASS_COUNT, np.float32, name='labels')
    
    def _build_layers(self):
        with c.layers.default_options(activation=c.ops.relu, pad=False):
            self.layers  = [
                c.layers.Convolution2D((5,5), 32, pad=True),
                c.layers.MaxPooling((3,3), (2,2)),
                c.layers.Convolution2D((3,3), 48),
                c.layers.MaxPooling((3,3), (2,2)),
                c.layers.Convolution2D((3,3), 64),
                c.layers.Dense(96),
                c.layers.Dropout(0.8),
                c.layers.Dense(self.IMAGE_CLASS_COUNT, activation=None)
            ]
    
    def _build_stack(self):
        self.stack = self.features
        for layer in self.layers: self.stack = layer(self.stack)
        self.loss  = c.losses.cross_entropy_with_softmax(self.stack, self.labels)
        self.error = c.metrics.classification_error(self.stack, self.labels)
    
    def _build_trainer(self):
        schedule     = c.learning_rate_schedule(self.LEARNING_RATE, c.UnitType.minibatch)
        learner      = c.learners.sgd(self.stack.parameters, schedule)
        printer      = c.logging.ProgressPrinter(tag='Training', num_epochs=self.epoch_count)
        self.trainer = c.Trainer(self.stack, (self.loss, self.error), learner, printer)
    
    def __enter__(self):
        return self
    
    def __exit__(self, type, value, traceback):
        pass


In [55]:
with ConvNet() as cnn:
    def train(epoch, batch_map):
        cnn.train(batch_map)
        cnn.log_progress()
    cnn.log_parameters()
    cnn.evaluate(train, {'features': train_features, 'labels': train_labels})
    cnn.save('train')

Training 98778 parameters in 10 parameter tensors.

Learning rate per minibatch: 0.2
Finished Epoch[1 of 80]: [Training] loss = 2.338234 * 512, metric = 88.28% * 512 1.585s (323.0 samples/s);
Finished Epoch[2 of 80]: [Training] loss = 2.317653 * 512, metric = 90.23% * 512 0.932s (549.4 samples/s);
Finished Epoch[3 of 80]: [Training] loss = 2.304480 * 512, metric = 88.87% * 512 0.925s (553.5 samples/s);
Finished Epoch[4 of 80]: [Training] loss = 2.288018 * 512, metric = 86.72% * 512 0.971s (527.3 samples/s);
Finished Epoch[5 of 80]: [Training] loss = 2.265363 * 512, metric = 83.79% * 512 0.927s (552.3 samples/s);
Finished Epoch[6 of 80]: [Training] loss = 2.282706 * 512, metric = 87.70% * 512 0.905s (565.7 samples/s);
Finished Epoch[7 of 80]: [Training] loss = 2.268359 * 512, metric = 84.57% * 512 1.314s (389.6 samples/s);
Finished Epoch[8 of 80]: [Training] loss = 2.259836 * 512, metric = 86.33% * 512 0.997s (513.5 samples/s);
Finished Epoch[9 of 80]: [Training] loss = 2.255026 * 512, 

Finished Epoch[77 of 80]: [Training] loss = 0.663016 * 512, metric = 22.07% * 512 0.947s (540.7 samples/s);
Finished Epoch[78 of 80]: [Training] loss = 0.661281 * 512, metric = 23.63% * 512 0.919s (557.1 samples/s);
Finished Epoch[79 of 80]: [Training] loss = 0.680322 * 512, metric = 22.66% * 512 0.901s (568.3 samples/s);
Finished Epoch[80 of 80]: [Training] loss = 0.684739 * 512, metric = 21.68% * 512 0.904s (566.4 samples/s);
