In [1]:
import os
import numpy as np

import tensorflow as tf
from tensorflow.python.keras.datasets import mnist
from tensorflow.contrib.eager.python import tfe

  from ._conv import register_converters as _register_converters


In [2]:
# enable eager mode
tf.enable_eager_execution()
tf.set_random_seed(0)
np.random.seed(0)

In [3]:
if not os.path.exists('weights/'):
    os.makedirs('weights/')

# constants
image_size = 28
batch_size = 512
epochs = 6
num_classes = 10

In [4]:
# dataset loading
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((-1, image_size, image_size, 1))
x_test = x_test.reshape((-1, image_size, image_size, 1))

# one hot encode the labels. convert back to numpy as we cannot use a combination of numpy
# and tensors as input to keras
y_train_ohe = tf.one_hot(y_train, depth=num_classes).numpy()
y_test_ohe = tf.one_hot(y_test, depth=num_classes).numpy()

print('x train', x_train.shape)
print('y train', y_train_ohe.shape)
print('x test', x_test.shape)
print('y test', y_test_ohe.shape)

x train (60000, 28, 28, 1)
y train (60000, 10)
x test (10000, 28, 28, 1)
y test (10000, 10)


# Create a basic Conv-BN-Relu Block

In [5]:
class ConvBNRelu(tf.keras.Model):
    def __init__(self, channels, strides=1, kernel=3, padding='same'):
        super(ConvBNRelu, self).__init__()
        self.conv =  tf.keras.layers.Conv2D(channels, (kernel, kernel), strides=(strides, strides), padding=padding,
                                            use_bias=False, kernel_initializer='he_normal')
        self.bn = tf.keras.layers.BatchNormalization()

    def call(self, inputs, training=None, mask=None):
        x = self.conv(inputs)
        x = self.bn(x, training=training)
        x = tf.nn.relu(x)
        return x

# Create an "Inception" Module

Lets chain together a few branches of Conv-BN-Relu blocks together into a hypercolumn and use that as a "Inception block". 

This is an adhoc model only for detailing that you can have any number of **Model as Layers** depth. We will be using these `InceptionBlocks` as Layers inside the `InceptionCIFAR` model, and these blocks themselves hold `ConvBNRelu` blocks.

In [6]:
class InceptionBlock(tf.keras.Model):
    def __init__(self, channels, strides=1):
        super(InceptionBlock, self).__init__()
        self.channels = channels
        self.strides = strides

        self.conv1 = ConvBNRelu(channels, strides, kernel=1)
        self.conv2 = ConvBNRelu(channels, strides, kernel=3)
        self.conv3_1 = ConvBNRelu(channels, strides, kernel=3)
        self.conv3_2 = ConvBNRelu(channels, 1, kernel=3)
        self.maxpool = tf.keras.layers.MaxPooling2D((3, 3), strides=(1, 1), padding='same')
        self.maxpool_conv = ConvBNRelu(channels, strides, kernel=1)

    def call(self, inputs, training=None, mask=None):
        x1 = self.conv1(inputs, training=training)

        x2 = self.conv2(inputs, training=training)

        x3_1 = self.conv3_1(inputs, training=training)
        x3_2 = self.conv3_2(x3_1, training=training)

        x4 = self.maxpool(inputs)
        x4 = self.maxpool_conv(x4, training=training)

        x = tf.keras.layers.concatenate([x1, x2, x3_2, x4], axis=-1)
        return x

# Create a Configurable Network

This network is adaptive, in that it can have many layers, and therefore we cannot determine the layers before hand.

To remedy this, we use the convenient `setattr` (and optinally `getattr`) to dynamically "register" and "call" sublayers.

# Note on why this is needed

Eager Models *will* automatically register all variables that have been bound to an identifier inside that class - 

- Using `self.layer_name = tf.keras.layers.***`
- Using `self.block = ClassWhichInheritsModel(...)`

However. **it will not register variables that have not been bound directly to the class itself or are custom variables.**

- Using `self.layers = [layer1, layer2]`
- Using `self.layers = {'l1':layer1, 'l2':layer2}`
- Using `self.variable = tf.get_variable(...)`

Special case : 

- Using `self.cells = [LSTMCell(), LSTMCell()]` and then wrapping it around an RNN as : `self.rnn = RNN(self.cells)` **will work as expected**. The weights of the LSTMCell will be registered and the RNN itself is registered as well.

**`setattr` and `getattr` bypasses the above issues, and sets the layers or models to the class itself, so it is registered by Keras.**

# Note 2

This registration of layers is important only for convenience of using Model methods - when using Model.compile(), Model.fit(), Model.predict() and Model.evaluate().

If there is no need for these utilities, you can write the class as you want, extract all the variables in a list, get the gradients using `tf.GradientTape()` and then update the parameters by hand using `Optimizer.apply_gradients()`. In such a scenario, even the **Model._set_input(...)** fix need not be applied, since you will be doing batch level training anyways and the first update will use that small batch to determine the shape of the model. Such an example is shown in `10_custom_model.ipynb`

However, it is far too convenient to use Keras' inbuilt methods for general use-cases such as classification and regression.

In [7]:
class InceptionCIFAR(tf.keras.Model):
    def __init__(self, num_layers, num_classes, initial_filters=16, **kwargs):
        super(InceptionCIFAR, self).__init__(**kwargs)

        self.in_channels = initial_filters
        self.out_channels = initial_filters
        self.num_layers = num_layers
        self.initial_filters = initial_filters

        self.conv1 = ConvBNRelu(initial_filters)

        self.blocks = []

        # build all the blocks
        for block_id in range(num_layers):
            for layer_id in range(2):  # 2 layers per block 
                key = 'block_%d_%d' % (block_id + 1, layer_id + 1)
                if layer_id == 0:
                    block = InceptionBlock(self.out_channels, strides=2)
                else:
                    block = InceptionBlock(self.out_channels)

                self.in_channels = self.out_channels
                
                # "register" this block to this model. Without this, weights wont update.
                setattr(self, key, block)
                
                self.blocks.append(block)

            self.out_channels *= 2

        self.avg_pool = tf.keras.layers.GlobalAveragePooling2D()
        self.fc = tf.keras.layers.Dense(num_classes)

    def call(self, inputs, training=None, mask=None):
        out = self.conv1(inputs, training=training)
        
        for block in self.blocks:
            out = block(out, training=training)

        out = self.avg_pool(out)
        out = self.fc(out)

        # softmax op does not exist on the gpu, so always use cpu
        with tf.device('/cpu:0'):
            output = tf.nn.softmax(out)

        return output

In [8]:
device = '/cpu:0' if tfe.num_gpus() == 0 else '/gpu:0'

with tf.device(device):
    # build model and optimizer
    model = InceptionCIFAR(2, num_classes)
    model.compile(optimizer=tf.train.AdamOptimizer(0.01), loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # TF Keras tries to use entire dataset to determine shape without this step when using .fit()
    # Fix = Use exactly one sample from the provided input dataset to determine input/output shape/s for the model
    dummy_x = tf.zeros((1, image_size, image_size, 1))
    model._set_inputs(dummy_x)

    print("Number of variables in the model :", len(model.variables))
    model.summary()

    # train
    model.fit(x_train, y_train_ohe, batch_size=batch_size, epochs=epochs,
              validation_data=(x_test, y_test_ohe), verbose=1)

    # evaluate on test set
    scores = model.evaluate(x_test, y_test_ohe, batch_size, verbose=1)
    print("Final test loss and accuracy :", scores)

    saver = tfe.Saver(model.variables)
    saver.save('weights/05_inception/weights.ckpt')

Number of variables in the model : 107
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_bn_relu_1 (ConvBNRelu)  multiple                  208       
_________________________________________________________________
inception_block_1 (Inception multiple                  7744      
_________________________________________________________________
inception_block_2 (Inception multiple                  23104     
_________________________________________________________________
inception_block_3 (Inception multiple                  50816     
_________________________________________________________________
inception_block_4 (Inception multiple                  91776     
_________________________________________________________________
global_average_pooling2d_1 ( multiple                  0         
_________________________________________________________________
dense_1 (Dense)              multiple