In [None]:
from __future__ import print_function

#import os ; os.environ['KERAS_BACKEND']='tensorflow'

import keras
import keras.backend as K

import numpy as np
import scipy.optimize
import scipy.misc
from collections import OrderedDict, namedtuple

if K._BACKEND == 'tensorflow':
    import tensorflow as tf
    def tensor_eye(size):
        return tf.eye(size)
elif K._BACKEND == 'theano':
    import theano.tensor as T
    def tensor_eye(size):
        return T.eye(size)
else:
    raise Exception('Unknown backend')

    
def get_mnist():
    nb_classes = 10
    (X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
    X_train = np.reshape(X_train, [X_train.shape[0], -1]).astype('float32') / 255.
    X_test  = np.reshape(X_test , [X_test.shape[0] , -1]).astype('float32') / 255.
    X_train = X_train * 2.0 - 1.0
    X_test  = X_test  * 2.0 - 1.0

    Y_train = keras.utils.np_utils.to_categorical(y_train, nb_classes)
    Y_test  = keras.utils.np_utils.to_categorical(y_test, nb_classes)

    Dataset = namedtuple('Dataset',['X','Y','y','nb_classes'])
    trn = Dataset(X_train, Y_train, y_train, nb_classes)
    tst = Dataset(X_test , Y_test, y_test, nb_classes)

    del X_train, X_test, Y_train, Y_test, y_train, y_test
    
    return trn, tst

trn, tst = get_mnist()

In [None]:
sgd_batchsize = 128

def Kget_dists(X):
    """Keras code to compute the pairwise distance matrix for a set of
    vectors specifie by the matrix X.
    """
    x2 = K.expand_dims(K.sum(K.square(X), axis=1), 1)
    dists = x2 + K.transpose(x2) - 2*K.dot(X, K.transpose(X))
    return dists
    
def kde_entropy_from_dists_loo(dists, x, var, exponent=1.0):
    # exponent=1.0 for KL estimator, 0.25 for BD
    dims = K.cast( K.shape(x)[1], K.floatx() ) 
    N    = K.cast( K.shape(x)[0], K.floatx() )
    
    dists2 = dists + tensor_eye(K.cast(N, 'int32')) * 10e20
    dists2 = dists2 / (2*var)
    normconst = (dims/2.0)*K.log(2*np.pi*var)
    lprobs  = K.logsumexp(-dists2, axis=1) - K.log(N-1) - normconst
    h = -K.mean(lprobs)
    return h

def entropy_estimator(x, var, exponent=1.0):
    # exponent=1.0 for KL estimator, 0.25 for BD
    dims = K.cast( K.shape(x)[1], K.floatx() ) 
    N    = K.cast( K.shape(x)[0], K.floatx() )
    dists = Kget_dists(x)
    dists2 = dists / (2*var)
    normconst = (dims/2.0)*K.log(2*np.pi*var)
    lprobs = K.logsumexp(-exponent * dists2 - normconst, axis=1) - K.log(N)
    h = -K.mean(lprobs)
    return dims/2 + h

def entropy_estimator2(x, var):
    # exponent=1.0 for KL estimator, 0.25 for BD
    dims = K.cast( K.shape(x)[1], K.floatx() ) 
    N    = K.cast( K.shape(x)[0], K.floatx() )
    dists = Kget_dists(x)
    dists2 = dists / (2*var)
    normconst = (dims/2.0)*K.log(2*np.pi*var)
    lprobs = K.logsumexp(-dists2, axis=1) - K.log(N) - normconst
    h = -K.mean(lprobs)
    return dims/2 + h

def wrapKfunc(f, data, targets):
    def callf(logvar):
        return f([data, targets, np.ones(len(data)),1,np.exp(logvar)])[0].flat[0]
    return callf

class Reporter(keras.callbacks.Callback):
    def __init__(self, on_every=1, *kargs, **kwargs):
        super(Reporter, self).__init__(*kargs, **kwargs)
        self.on_every = on_every
        
    def on_train_begin(self, logs={}):
        self.layerfuncs = []
        var = K.placeholder(ndim=0)
        inputs = self.model.inputs + self.model.targets + self.model.sample_weights + [ K.learning_phase(),] + [var,]
        for lndx, l in enumerate(self.model.layers[2:-1]):
            f = K.function(inputs, [kde_entropy_from_dists_loo(Kget_dists(l.output),l.output,var)])
            f2 = K.function(inputs, [entropy_estimator(l.output,var)])
            f3 = K.function(inputs, [entropy_estimator(l.output,var, 0.25)])
            self.layerfuncs.append([f, f2, f3])
        self.saved_logs = {}
            
    def on_epoch_end(self, epoch, logs={}):
        if not (epoch % self.on_every == 0):
            return
            
        l = OrderedDict()
        for lndx, [f_entropy_loo, f_entropy_upper, f_entropy_lower] in enumerate(self.layerfuncs):
            #def callf(var):
            #    return self.Kfunc([trn.X[::20], trn.Y[::20], np.ones(len(trn.X[::20])),1,np.exp(logvar)])[0].flat[0]
            #   
            r = scipy.optimize.minimize_scalar(wrapKfunc(f_entropy_loo, trn.X[::20], trn.Y[::20]), method='brent')
            l['trn_layer_%d_h_loo'%lndx] = r.fun
            l['trn_layer_%d_logvar'%lndx] = r.x
            l['trn_layer_%d_h_upper'%lndx] = wrapKfunc(f_entropy_upper, trn.X[::20], trn.Y[::20])(r.x)
            l['trn_layer_%d_h_lower'%lndx] = wrapKfunc(f_entropy_lower, trn.X[::20], trn.Y[::20])(r.x)

            r = scipy.optimize.minimize_scalar(wrapKfunc(f_entropy_loo, tst.X[::10], tst.Y[::10]), method='brent')
            l['tst_layer_%d_h_loo' %lndx] = r.fun
            l['tst_layer_%d_logvar'%lndx] = r.x
            l['tst_layer_%d_h_upper'%lndx] = wrapKfunc(f_entropy_upper, tst.X[::10], tst.Y[::10])(r.x)
            l['tst_layer_%d_h_lower'%lndx] = wrapKfunc(f_entropy_lower, tst.X[::10], tst.Y[::10])(r.x)
            
            
        for k,v in l.items():
            print(k,"=",v)
            logs[k] = v
            
        self.saved_logs[epoch] = l.copy()
        
            
input_layer  = keras.layers.Input((trn.X.shape[1],))
hidden_output = keras.layers.Dense(1024, activation='relu')(input_layer)
hidden_output = keras.layers.Dense(20, activation='relu')(hidden_output)
hidden_output = keras.layers.Dense(20 , activation='relu')(hidden_output)

outputs  = keras.layers.Dense(trn.nb_classes, activation='softmax')(hidden_output)
model = keras.models.Model(inputs=input_layer, outputs=outputs)
optimizer = keras.optimizers.SGD(lr=0.001)# , momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

reporter   = Reporter(on_every=10)
#lrcb       = keras.callbacks.LearningRateScheduler(lrscheduler)

r = model.fit(x=trn.X, y=trn.Y, verbose=2, batch_size=sgd_batchsize, epochs=20000, 
              validation_data=(tst.X, tst.Y), callbacks=[reporter,])
              #callbacks=[lrcb,])

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.figure(figsize=(15,10))
for rowndx, t in enumerate(['trn','tst']):
    plt.subplot(2,2,rowndx*2+1)
    epochs = sorted(reporter.saved_logs.keys())
    
    plt.plot(epochs, [reporter.saved_logs[epoch][t+'_layer_0_h_upper'] for epoch in epochs], 'r', label="%s $H_{KL}$(layer1)"%t)
    plt.plot(epochs, [reporter.saved_logs[epoch][t+'_layer_0_h_lower'] for epoch in epochs], 'r--', label="%s $H_{BD}$(layer1)"%t)
    plt.plot(epochs, [reporter.saved_logs[epoch][t+'_layer_1_h_upper'] for epoch in epochs], 'b', label="%s $H_{KL}$(layer2)"%t)
    plt.plot(epochs, [reporter.saved_logs[epoch][t+'_layer_1_h_lower'] for epoch in epochs], 'b--', label="%s $H_{BD}$(layer2)"%t)
    plt.legend(loc='lower right')
    plt.xlabel('Epochs')
    plt.ylabel('Layer Activity Entropy')
    #plt.savefig('run2.pdf')

    plt.subplot(2,2,rowndx*2+2)
    plt.plot(epochs, [reporter.saved_logs[epoch][t+'_layer_0_h_loo'] for epoch in epochs], 'r', label="%s $H_{loo}$(layer1)"%t)
    plt.plot(epochs, [reporter.saved_logs[epoch][t+'_layer_1_h_loo'] for epoch in epochs], 'b', label="%s $H_{loo}$(layer2)"%t)

    plt.legend(loc='lower right')
    plt.xlabel('Epochs')
    plt.ylabel('Layer Activity Entropy')
    
plt.savefig('run_output.pdf')

In [None]:
multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]])

