In [2]:
import os
import numpy as np

import tensorflow as tf
from tensorflow.python.keras.datasets import mnist
from tensorflow.contrib.eager.python import tfe

  from ._conv import register_converters as _register_converters


In [3]:
# enable eager mode
tf.enable_eager_execution()
tf.set_random_seed(0)
np.random.seed(0)

In [4]:
# constants
hidden_dim = 500
batch_size = 128
epochs = 10
num_classes = 10

if not os.path.exists('weights/'):
    os.makedirs('weights/')

In [5]:
# dataset loading
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# normalization of dataset
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

# flatten the dataset
x_train = x_train.reshape((-1, 28 * 28))
x_test = x_test.reshape((-1, 28 * 28))

# one hot encode the labels. convert back to numpy as we cannot use a combination of numpy
# and tensors as input to keras
y_train_ohe = tf.one_hot(y_train, depth=num_classes).numpy()
y_test_ohe = tf.one_hot(y_test, depth=num_classes).numpy()

print('x train', x_train.shape)
print('y train', y_train_ohe.shape)
print('x test', x_test.shape)
print('y test', y_test_ohe.shape)

x train (60000, 784)
y train (60000, 10)
x test (10000, 784)
y test (10000, 10)


# Multi Layered Perceptron

Extremely simple to build, yet powerful enough for MNIST. Easily manages 95% test accuracy in less than 5 epochs. Then again, MNIST is not nearly challenging enough.

Something to note : We can chain activation for all the intermediate layers when creating the layers. Dont bother with `tf.keras.layers.Activation()` unless you are doing a `Conv-BatchNorm-Relu` block, which will be shown later.

In [6]:
# model definition (canonical way)
class MLP(tf.keras.Model):

    def __init__(self, hidden_units, num_classes):
        super(MLP, self).__init__()

        self.hidden = tf.keras.layers.Dense(hidden_units, activation='relu')
        self.classifier = tf.keras.layers.Dense(num_classes)

    def call(self, inputs, training=None, mask=None):
        x = self.hidden(inputs)
        output = self.classifier(x)

        # softmax op does not exist on the gpu, so always use cpu
        with tf.device('/cpu:0'):
            output = tf.nn.softmax(output)

        return output

In [7]:
device = '/cpu:0' if tfe.num_gpus() == 0 else '/gpu:0'

with tf.device(device):
    # build model and optimizer
    model = MLP(hidden_dim, num_classes)
    model.compile(optimizer=tf.train.GradientDescentOptimizer(0.1), loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # TF Keras tries to use entire dataset to determine shape without this step when using .fit()
    # Fix = Use exactly one sample from the provided input dataset to determine input/output shape/s for the model
    dummy_x = tf.zeros((1, 28 * 28))
    model._set_inputs(dummy_x)

    # train
    model.fit(x_train, y_train_ohe, batch_size=batch_size, epochs=epochs,
              validation_data=(x_test, y_test_ohe), verbose=2)

    # evaluate on test set
    scores = model.evaluate(x_test, y_test_ohe, batch_size, verbose=2)
    print("Final test loss and accuracy :", scores)


Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 6s - loss: 0.4373 - acc: 0.8836 - val_loss: 0.2715 - val_acc: 0.9245
Epoch 2/10
 - 6s - loss: 0.2462 - acc: 0.9314 - val_loss: 0.2105 - val_acc: 0.9406
Epoch 3/10
 - 6s - loss: 0.1967 - acc: 0.9451 - val_loss: 0.1797 - val_acc: 0.9485
Epoch 4/10
 - 6s - loss: 0.1647 - acc: 0.9538 - val_loss: 0.1510 - val_acc: 0.9564
Epoch 5/10
 - 6s - loss: 0.1414 - acc: 0.9604 - val_loss: 0.1390 - val_acc: 0.9607
Epoch 6/10
 - 6s - loss: 0.1235 - acc: 0.9661 - val_loss: 0.1234 - val_acc: 0.9653
Epoch 7/10
 - 5s - loss: 0.1098 - acc: 0.9696 - val_loss: 0.1128 - val_acc: 0.9675
Epoch 8/10
 - 5s - loss: 0.0984 - acc: 0.9729 - val_loss: 0.1051 - val_acc: 0.9691
Epoch 9/10
 - 5s - loss: 0.0890 - acc: 0.9759 - val_loss: 0.1000 - val_acc: 0.9722
Epoch 10/10
 - 6s - loss: 0.0814 - acc: 0.9778 - val_loss: 0.0920 - val_acc: 0.9740
Final test loss and accuracy : [0.09196617434620857, 0.974]


# Saving weights
Saving weights for Eager models which are built solely with Keras Layers or Keras Models is super simple.

I havent tried mixing tf.layers API with Eager tf.keras. Something to try once the API matures.

As easy as it is to save models, there are several pain points with restoring models though, which are discussed in the final tutorial - `10_custom_models.ipynb`.

In [9]:
# Save the weights of the model. This is same for all models defined after this.
saver = tfe.Saver(model.variables)
saver.save('weights/03_feedforward/weights.ckpt')

'weights/03_feedforward/weights.ckpt'