In [None]:
import keras.backend as K

from keras.regularizers import l1_l2
from keras.models import Model
from keras.callbacks import TensorBoard, ReduceLROnPlateau
from tensorflow.python import debug as tf_debug
import keras.layers as layers

import numpy as np
import os
import matplotlib.pyplot as plt

import tensorflow as tf
import numpy as np
from keras.datasets import mnist

from abyss_deep_learning.keras.tensorboard import ImprovedTensorBoard, procuce_embeddings_tsv
from abyss_deep_learning.keras.metrics import mpca_factory, auc_factory

# Note the debugger doesn't work with Jupyter/Docker well. Change the address below to your computers name or localhost.
K.set_session(
    tf_debug.TensorBoardDebugWrapperSession(K.get_session(), "herbicide_dl:7003"))

In [None]:
##### Setup data #####
# This will load MNIST, select an evenly sampled val_data, scale and shuffle the data
val_data_samples_per_class = 10


(x_train, y_train), (x_test, y_test) = mnist.load_data()
val_data = (x_test, y_test)
mask = np.hstack([
    np.random.choice(np.where(y_test == l)[0], val_data_samples_per_class, replace=False)
    for l in np.unique(val_data[1])])

x_train = x_train[..., np.newaxis] / 127.5 - 1
x_test = x_test[..., np.newaxis] / 127.5 - 1
y_train_ = np.zeros((len(y_train), 10))
y_train_[np.arange(len(y_train)), y_train] = 1
y_train = y_train_
y_test_ = np.zeros((len(y_test), 10))
y_test_[np.arange(len(y_test)), y_test] = 1
y_test = y_test_
del y_train_, y_test_

idx = np.arange(len(mask))
np.random.shuffle(idx)
val_data = (x_test[mask][idx], y_test[mask][idx])

print("val_data has", val_data[1].shape, "shape")

In [None]:
########## Model Construction Functions ###########

def tensor_size(x):
    return np.prod(x.shape.as_list()[1:])

def make_backbone(x, num_layers, l1, l2):
    for i in range(num_layers):
        name = 'conv{:d}'.format(i)
        x = layers.Conv2D(
            64 * 2 ** i, 3,
            strides=1, kernel_regularizer=l1_l2(0, l2 / tensor_size(x))
        )(x)
        x = layers.Activation('relu')(x)
        x = layers.ActivityRegularization(l1 / tensor_size(x))(x)
    x = layers.GlobalAveragePooling2D(name='features')(x)
    return x

def make_head(x):
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(10, activation='softmax', name='predictions')(x)
    return x

########### Scalar Summary Functions ##########
# these are also defined in abyss_deep_learning.keras.tensorboard, but shown here for demo purposes.

def kernel_sparsity(model, min_value=1e-6):
    num = tf.zeros(1)
    den = tf.zeros(1)
    for weight in model.trainable_weights:
        size = tf.cast(tf.size(weight), tf.float32)
        zeros = size - tf.cast(tf.count_nonzero(tf.greater(weight, min_value)), tf.float32)
        num += zeros
        den += size
    return num / den

def avg_update_ratio(model, weight):
    grads = model.optimizer.get_gradients(model.total_loss, [weight])[0]
    return tf.norm(grads) * model.optimizer.lr / tf.norm(weight)

In [None]:
###### Setup vars #########
log_dir = '/tmp/test' # For tensorboard output
batch_size = 32
shape = (28, 28, 1)

In [None]:
####### Construct the model #######3
K.clear_session()
inputs = layers.Input(shape=shape)
model = Model(
    inputs,
    make_head(
        make_backbone(inputs, num_layers=3, l1=1e-2, l2=1e-2)))
model.compile(
    'nadam',
    loss='categorical_crossentropy',
    metrics=[
        'categorical_accuracy',
        mpca_factory(num_classes=y_test.shape[1]),
        auc_factory('PR'),
        auc_factory('ROC'),
    ])
model.summary()

In [None]:
####### Instantiate and save the callbacks list ######3
predictions_kernel = model.layers[-1].trainable_weights[0] # Used in a scalar callback

callbacks = [
    ImprovedTensorBoard(
        log_dir=log_dir,
        histogram_freq=5, batch_size=batch_size,
        scalars={
            'learning_rate': model.optimizer.lr,
            'feature_sparsity': kernel_sparsity(model),
            'prediction_UW_ratio': avg_update_ratio(model, predictions_kernel)
        },
        groups={'performance': {
            'loss': ['loss', 'val_loss'],
            'accuracy': [r'.*accuracy.*'],
            'Mean Per-Class Average Accuracy': [r'.*mpca.*'],
            'Mean Avg Precision': [r'.*PR.*'],
            'ROC AUC': [r'.*ROC.*']
        }},
        pr_curve=True,
        num_classes=val_data[1].shape[1],
        write_graph=True,
        write_grads=True,
        write_images=False,
        embeddings_freq=10,
        embeddings_layer_names=['predictions', 'features'],
        embeddings_metadata=(log_dir + "/data_labels.tsv"),
        embeddings_data=val_data[0],
#         val_size=len(x_test), 
#         img_path='/tmp/test/mnist_10k_sprite.png', img_size=(28, 28)
    ),
    ReduceLROnPlateau(patience=3, factor=0.5, min_delta=0.0, verbose=1)
]

y = val_data[1].argmax(axis=1)
procuce_embeddings_tsv(
    log_dir + "/data_labels.tsv",
    headers=['label', 'text'],
    labels=np.array([y + 1, y]).T.astype(int))

In [None]:
######## Train the model #######
epochs = [11]
lrs = [5e-3]
epoch = 0
print("val_data", val_data[1].shape)
for num_epochs, lr in zip(epochs, lrs):
    K.set_value(model.optimizer.lr, lr)
    model.fit(
        x=x_train[::10], y=y_train[::10],
        batch_size=batch_size,
        validation_data=val_data,
        epochs=(epoch + num_epochs),
        callbacks=callbacks,
        initial_epoch = epoch
    )
    epoch += num_epochs

## Now go check out tensorboard!

# Check training is working
Calculate update / weight ratio, should be ~ 1e-3 if training well.
Note the predictions/kernel value is shown as a custom scalar in tensorboard.

In [None]:
def model_update_ratios(model, batch_data):
    names, weights, values = zip(*[(weight.name, weight, K.get_value(weight))
        for layer in model.layers if hasattr(layer, 'weights')
        for weight in layer.weights])
    lr = K.eval(model.optimizer.lr)
    grads = model.optimizer.get_gradients(model.total_loss, weights)
    tensors = (model.inputs + model.targets + model.sample_weights)
    feed_dict = dict(zip(tensors, batch_data))

    grads, values_2 = K.get_session().run([grads, weights], feed_dict=feed_dict)
    stats = []
    for name, weight, value, value2, grad in zip(names, weights, values, values_2, grads):
        update_ratio = np.linalg.norm(grad) * lr / np.linalg.norm(value)
        stats.append(update_ratio)
        print("{:30s}{:.2e}".format(name, update_ratio))
    return np.array(stats)

In [None]:
print("Update to weight ratios:")
batch_val = [x_test[:batch_size, ...], y_test[:batch_size], np.ones(batch_size)]
stats = model_update_ratios(model, batch_val)