In [42]:
import os
import numpy as np
from sklearn.datasets        import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.preprocessing   import label_binarize

PIXEL_RANGE = range(0, 255 + 1)
IMAGE_RANGE = range(0, 9 + 1)

def process(mnist):
    # Process dataset to return features and labels for CNN
    def features():
        # Transform features to be float32 sets of 1x28x28 
        # tensors with normalized pixel values.
        return np.divide(
            mnist.data, PIXEL_RANGE[-1]
        ).astype(np.float32).reshape((-1, 1, 28, 28))
    def labels():
        # Transform labels to be float32 sets of 1x10
        # tensors that are one-hot encoded.
        return label_binarize(
            mnist.target, classes=IMAGE_RANGE
        ).astype(np.float32).reshape((-1, 10))
    return features(), labels()


# Download the MNIST dataset
mnist            = fetch_mldata('MNIST original', data_home='.')
features, labels = process(mnist)

# Split train and test data
train_features, test_features, train_labels, test_labels = train_test_split(features, labels)

# Display feature and label shapes
print('features:', features.shape, 'labels:', labels.shape)

features: (70000, 1, 28, 28) labels: (70000, 10)


In [43]:
# Import CNTK
import cntk as c

# Display CNTK version
' '.join([
    c.__name__.upper(),
    c.__version__,
    str(c.device.all_devices()[0])
])

'CNTK 2.0 CPU'

In [60]:
class ConvNet():
    IMAGE_SHAPE         = (1, 28, 28)
    IMAGE_CLASSES       = [n for n in IMAGE_RANGE]
    IMAGE_CLASS_COUNT   = len(IMAGE_CLASSES)
    LEARNING_RATE       = [0.2] * 20 + [0.1] * 20 + [0.0001] * 20 + [0.00001] * 20
    DEFAULT_EPOCH_COUNT = 80
    DEFAULT_BATCH_SIZE  = 512
    
    def __init__(self,
                 epoch_count=DEFAULT_EPOCH_COUNT,
                 batch_size=DEFAULT_BATCH_SIZE):
        self.epoch_count = epoch_count
        self.batch_size  = batch_size
        self._build()
    
    def evaluate(self, fn, feeds):
        # Evaluate a function for each epoch with minibatched 
        # feed data. Used for training and testing the model.
        for epoch in range(self.epoch_count):
            batch = self.batch(epoch, feeds)
            fn(epoch, batch)
    
    def train(self, data):
        self.trainer.train_minibatch(data)
    
    def test(self, data):
        self.trainer.test_minibatch(data)
    
    def log_train_progress(self):
        self.trainer.summarize_training_progress()
    
    def log_test_progress(self):
        self.trainer.summarize_test_progress()
    
    def log_parameters(self):
        c.logging.log_number_of_parameters(self.stack)
        print()
    
    def save(self, version):
        self.stack.save(self._to_file(version))
    
    def restore(self, version):
        self.stack.restore(self._to_file(version))
    
    def batch(self, epoch, data):
        # Slice data into batch chunks based on the current epoch
        def chunk(data):
            slice_begin = epoch * self.batch_size
            slice_end   = slice_begin + self.batch_size
            return data[slice_begin:slice_end]
        return {key: chunk(value) for key, value in data.items()}
    
    def _to_file(self, name):
        file_names = [
            self.__class__.__name__, 'MNIST', '{}.dnn'.format(name)
        ]
        return os.path.join(
            '.', 'checkpoints', '_'.join(file_names)
        )
    
    def _build(self):
        self._build_inputs()
        self._build_layers()
        self._build_stack()
        self._build_trainer()
    
    def _build_inputs(self):
        # Define input variables of neural net
        self.features = c.input_variable(self.IMAGE_SHAPE,       np.float32, name='features')
        self.labels   = c.input_variable(self.IMAGE_CLASS_COUNT, np.float32, name='labels')
    
    def _build_layers(self):
        # Define neural net layers
        with c.layers.default_options(activation=c.ops.relu, pad=False):
            self.layers  = [
                c.layers.Convolution2D((5,5), 32, pad=True),
                c.layers.MaxPooling((3,3), (2,2)),
                c.layers.Convolution2D((3,3), 48),
                c.layers.MaxPooling((3,3), (2,2)),
                c.layers.Convolution2D((3,3), 64),
                c.layers.Dense(96),
                c.layers.Dropout(0.8),
                c.layers.Dense(self.IMAGE_CLASS_COUNT, activation=None)
            ]
    
    def _build_stack(self):
        # Stack neural net layers into model
        self.stack = self.features
        for layer in self.layers: self.stack = layer(self.stack)
        self.loss  = c.losses.cross_entropy_with_softmax(self.stack, self.labels)
        self.error = c.metrics.classification_error(self.stack, self.labels)
    
    def _build_trainer(self):
        # Define neural net trainer
        schedule     = c.learning_rate_schedule(self.LEARNING_RATE, c.UnitType.minibatch)
        learner      = c.learners.sgd(self.stack.parameters, schedule)
        printer      = c.logging.ProgressPrinter(tag='Training', num_epochs=self.epoch_count)
        self.trainer = c.Trainer(self.stack, (self.loss, self.error), learner, printer)
    
    def __enter__(self):
        return self
    
    def __exit__(self, type, value, traceback):
        pass


In [61]:
with ConvNet() as cnn:
    # Train the model and save state to a checkpoint file
    def train(epoch, batch_map):
        cnn.train(batch_map)
        cnn.log_train_progress()
    cnn.log_parameters()
    cnn.evaluate(train, {'features': train_features, 'labels': train_labels})
    cnn.save('train')

Training 98778 parameters in 10 parameter tensors.

Learning rate per minibatch: 0.2
Finished Epoch[1 of 80]: [Training] loss = 2.313337 * 512, metric = 89.06% * 512 2.460s (208.1 samples/s);
Finished Epoch[2 of 80]: [Training] loss = 2.299743 * 512, metric = 88.48% * 512 0.902s (567.6 samples/s);
Finished Epoch[3 of 80]: [Training] loss = 2.302219 * 512, metric = 88.28% * 512 1.215s (421.4 samples/s);
Finished Epoch[4 of 80]: [Training] loss = 2.276180 * 512, metric = 87.89% * 512 0.972s (526.7 samples/s);
Finished Epoch[5 of 80]: [Training] loss = 2.281023 * 512, metric = 86.72% * 512 1.807s (283.3 samples/s);
Finished Epoch[6 of 80]: [Training] loss = 2.274911 * 512, metric = 85.74% * 512 1.533s (334.0 samples/s);
Finished Epoch[7 of 80]: [Training] loss = 2.274856 * 512, metric = 87.11% * 512 1.536s (333.3 samples/s);
Finished Epoch[8 of 80]: [Training] loss = 2.258026 * 512, metric = 80.86% * 512 1.086s (471.5 samples/s);
Finished Epoch[9 of 80]: [Training] loss = 2.248255 * 512, 

Finished Epoch[77 of 80]: [Training] loss = 0.758766 * 512, metric = 24.02% * 512 1.210s (423.1 samples/s);
Finished Epoch[78 of 80]: [Training] loss = 0.795428 * 512, metric = 27.73% * 512 1.009s (507.4 samples/s);
Finished Epoch[79 of 80]: [Training] loss = 0.722301 * 512, metric = 27.15% * 512 0.975s (525.1 samples/s);
Finished Epoch[80 of 80]: [Training] loss = 0.623668 * 512, metric = 19.92% * 512 0.927s (552.3 samples/s);


In [64]:
with ConvNet(epoch_count=5) as cnn:
    # Restore state from a checkpoint file and test the model 
    def test(epoch, batch_map):
        cnn.test(batch_map)
        cnn.log_test_progress()
    cnn.log_parameters()
    cnn.restore('train')
    cnn.evaluate(test, {'features': train_features, 'labels': train_labels})

Training 98778 parameters in 10 parameter tensors.

Finished Evaluation [1]: Minibatch[1-1]: metric = 7.81% * 512;
Finished Evaluation [2]: Minibatch[1-1]: metric = 8.20% * 512;
Finished Evaluation [3]: Minibatch[1-1]: metric = 8.01% * 512;
Finished Evaluation [4]: Minibatch[1-1]: metric = 9.38% * 512;
Finished Evaluation [5]: Minibatch[1-1]: metric = 9.96% * 512;
