# SKETCHBOOK 
#### For the sparsity penalty implementation

Different implementation using
1. a class for the activity regulariser based on Keras Regularizer class (*attribute problem* so far)
2. a function to apply that takes as input the activation layer (but *how to get the activation class?*)
3. a *splitted* model that gives an intermediate output to compute the mean over and wraps the function defined above inside a lambda layer

In [None]:
import numpy as np
from keras.layers import *
from keras.models import Model
from keras import backend as K
from keras import losses
from keras import regularizers

# using leaky relu?
from keras.layers.advanced_activations import PReLU

# using mnist

from keras.datasets import mnist
import numpy as np
(x_train, _),(x_test, _) = mnist.load_data()

x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))  # adapt this if using `channels_first` image data format
x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))  # adapt this if using `channels_first` image data format


#x_train = np.random.rand(128,128,100).astype('float32')
#x_train = np.reshape(a=x_train,newshape=[100,128,128,1])
#p = K.constant(value=5e-2,shape=(1,1)) #kullback is expecting two tensors of the same shape
#print(kl.shape, beta.shape, p.shape)
#print(x_train.shape)

In [None]:
# compile it only for splitted model
def penaltyTerm(yTrue,yPred):
    return beta*(losses.kullback_leibler_divergence(p, yPred)/100) # hard coded since predefined batch size

def customLoss(yTrue,yPred):
    return losses.mean_squared_error(yTrue,yPred) + penaltyTerm(yTrue,yPred)

In [None]:
# OPTION 1
# the KL divergence describe the penalty term to be applied to the loss function
def KL(p, p_hat):
    return (p * K.log(p / p_hat)) + ((1-p) * K.log((1-p) / (1-p_hat)))

class SparseReg(regularizers.Regularizer):

    def __init__(self, p=0.05, beta=0.1,p_hat=0.0):
        self.p = K.cast_to_floatx(p)
        self.beta = K.cast_to_floatx(beta)
        self.p_hat = K.cast_to_floatx(p_hat)

    def __call__(self, x):
        regularization = 0.
        # p_hat needs to be the average activation of the units in the hidden layer.      
        self.p_hat = K.sum(K.mean(x))

        regularization += self.beta * KL(self.p,self.p_hat)
        return regularization

    def get_config(self):
        return {'p': float(self.p),
                'beta': float(self.beta)
               }

In [None]:
# OPTION 2
#define a custom sparse loss
# the KL divergence describe the penalty term to be applied to the loss function
def KL(p, p_hat):
    return (p * K.log(p / p_hat)) + ((1-p) * K.log((1-p) / (1-p_hat)))

# define a custom activity regularisation function
# obs: the function can be wrapped inside a lambda layer
def sparse_reg(x):
    p = 0.05; # desired average activation of the hidden units
    beta = 0.1; # weight of sparsity penalty term
    # axis 0 batch_size, axis 1 layer size
    p_hat = K.mean(x, axis=0) # average over the batch samples
    return KL(p, p_hat) 

In [None]:
# Define the model
# encoder
inp = Input(shape=(28,28,1))
lay = Conv2D(filters=16,kernel_size=(4,4),padding='same',
             activation=PReLU(),activity_regularizer=SparseReg(beta=5e-1,p=1e-2),name='encoder')(inp)

# computes on the top of the hidden layer
# obs: a Lambda layer is used to evaluate the sparse regularisation function
#layMean = Lambda(lambda x: sparse_reg(x),name='layMean')(lay)
#laySum = Lambda(lambda x: K.sum(x),name='laySum')(layMean)
layMean = Lambda(lambda x: K.mean(x),name='layMean')(lay)

# decoder
out = Conv2D(filters=1,kernel_size=(4,4),padding='same',activation=LeakyReLU(0.03) ,name='decoder')(lay)
#outMean = Lambda(lambda x: sparse_reg(x),output_shape=(1,1))(out)
#outSum = Lambda(lambda x: K.sum(x))(outMean)

In [None]:
# Create a model that uses the custom loss function
# obs: to use a specific output, the model should be splitted for that output
model = Model(inputs=inp,outputs=out,name='sparse_cae')

In [None]:
# define a custom loss function
def customLoss(yTrue,yPred):
    return losses.mean_squared_error(yTrue,yPred) + K.sum(losses.kullback_leibler_divergence(p,layMean))

# Compile the model 
model.compile(optimizer='sgd', loss='mse')

In [None]:
#train the model
model.fit(x_train, x_train, epochs=1, batch_size=128)

In [15]:
from keras.models import Model
from keras.layers import Dense, Input
# using prelu?
from keras.layers.advanced_activations import PReLU
    
# Model definition
# encoder
inp = Input(shape=(16,))
lay = Dense(64, kernel_initializer='uniform',activation=PReLU(), name='encoder')(inp)
#decoder
out = Dense(2,kernel_initializer='uniform',activation=PReLU(), name='decoder')(lay)

# build the model
model = Model(inputs=inp,outputs=out,name='cae')