### End to end train both networks

In [1]:
from keras.layers import Input, Dense, Activation, Lambda, Dropout, Concatenate, Reshape
from keras.models import Model, Sequential
from keras.callbacks import TensorBoard
from keras import optimizers
from keras import backend as K

import tensorflow as tf

from keras.regularizers import l2


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
### Hyperparameters

experiment_id = 0

learning_rate = 0.0001

epoch_list = [200, 20000, 10000, 4000]
batch_size_list = [1024, 1024, 128, 1024]

lambda_1 = 0.1       
lambda_2 = 0.005     

#################

epochs = 100 ###epoch_list[experiment_id]
batch_size = batch_size_list[experiment_id]

### Define compresion network

In [3]:
input_data = Input(batch_shape=(batch_size,120), name='input_placeholder')

encoded = Dense(60, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(input_data)
encoded = Dense(30, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(encoded)
encoded = Dense(10, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(encoded)

layer_lowdim = Dense(1, activation='linear', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001), name='lowdim')(encoded)

decoded = Dense(10, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(layer_lowdim)
decoded = Dense(30, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(decoded)
decoded = Dense(60, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(decoded)
decoded = Dense(120, activation='linear', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001), name='reconstructed')(decoded)



- Define similarity metrics

In [4]:
def cos_sim(a_b):
    '''
    a: batch x 120
    b: batch x 120 
    
    output: batch x 1
    '''
    a, b = a_b
    
    norm_a = K.sqrt(K.sum(a ** 2, axis=-1))
    norm_b = K.sqrt(K.sum(b ** 2, axis=-1))
    
    out = K.sum(a * b, axis=-1) / (norm_a * norm_b)
    out = K.reshape(out, [batch_size, 1])
    
    return out

def relative_euc_dist(a_b):
    '''
    a: batch x 120
    b: batch x 120 
    
    output: batch x 1
    '''
    a,b = a_b
    
    norm_diff = K.sqrt(K.sum((a - b)**2, axis=-1))
    norm_a = K.sqrt(K.sum(a ** 2, axis=-1))
    
    out = norm_diff / norm_a
    out = K.reshape(out, [batch_size, 1])

    return out

In [5]:
layer_cossim = Lambda(cos_sim,
                      name='cos_sim')([input_data, decoded])

In [6]:
layer_relativeEuc = Lambda(relative_euc_dist, 
                           name='relative_euc_dist')([input_data, decoded])

- Obtain "z"

In [7]:
def funct_concat(tensors):
    return K.concatenate(tensors)

layer_concat = Lambda(funct_concat, name="z")([layer_lowdim, layer_cossim, layer_relativeEuc])

### Define estimation network

In [8]:
input_est = layer_concat #Input(shape=(3,))(layer_concat)

est_layer = Dense(10, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(input_est)   ####(input_est)
est_layer = Dropout(0.5)(est_layer)
est_output = Dense(4, activation='softmax', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001), name='gamma')(est_layer)


### Build full network

In [9]:
full_network = Model(input=input_data, outputs=est_output)

  """Entry point for launching an IPython kernel.


In [10]:
full_network.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_placeholder (InputLayer)  (1024, 120)          0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (1024, 60)           7260        input_placeholder[0][0]          
__________________________________________________________________________________________________
dense_2 (Dense)                 (1024, 30)           1830        dense_1[0][0]                    
__________________________________________________________________________________________________
dense_3 (Dense)                 (1024, 10)           310         dense_2[0][0]                    
__________________________________________________________________________________________________
lowdim (De

### Load already saved dataset

In [12]:
import numpy as np

In [13]:
with np.load('../../datasets/kddcup/kdd99_train-randomState_None.npz') as data: 
    x_train = data["x_train"]
    y_train = data["y_train"]

In [14]:
x_train.shape

(198365, 120)

In [15]:
y_train.shape

(198365, 4)

### GMM parameters

- k: number of clusters (4 for here)
- N: batch size
- d: Dimension of latent vector z (3 here)

- gamma ($\gamma$) : membership predictions (softmax output of estimation net) [$N \times K$]
- phi ($\phi$): gaussian probabilities [$K$]
- mu ($\mu$): gaussian means [$K \times d$]
- sigma ($\Sigma$): gaussian covariances [$K \times d \times d$]

In [16]:
k = 4 
N = batch_size 
d = int(layer_concat.get_shape()[1])

- GMM parameters init

In [17]:
phi = tf.get_variable("phi",
                      shape=(k),
                      dtype=tf.float32,
                      initializer=tf.zeros_initializer(),
                      trainable=False)

mu = tf.get_variable("mu",
                      shape=(k,d),
                      dtype=tf.float32,
                      initializer=tf.zeros_initializer(), 
                      trainable=False)

sigma_init = np.repeat([np.eye(d, dtype=np.float32)], k, axis=0)

sigma = tf.get_variable("sigma",
                      shape=(k,d,d),
                      dtype=tf.float32,
                      initializer=tf.constant_initializer(sigma_init),
                      trainable=False)

In [18]:
tf_sess = K.get_session()

In [19]:
tf_sess.run([phi.initializer,
             mu.initializer, 
             sigma.initializer])

[None, None, None]

- Check for uninit vars

In [20]:
print(tf_sess.run(tf.report_uninitialized_variables()))

[]


In [21]:
def printGmmParams():
    print("#### GMM params ####")
    print("phi:\n",K.eval(phi),"\n")
    print("mu:\n",K.eval(mu),"\n")
    print("sigma:\n", K.eval(sigma),"\n")



In [22]:
def saveGmmParams(filename = "gmmParams.npz"):
    phi_ = K.eval(phi)
    mu_ = K.eval(mu)
    sigma_ = K.eval(sigma)
    np.savez_compressed(filename, phi=phi_, mu=mu_, sigma=sigma_)

In [23]:
printGmmParams()

#### GMM params ####
phi:
 [0. 0. 0. 0.] 

mu:
 [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]] 

sigma:
 [[[1. 0. 0.]
  [0. 1. 0.]
  [0. 0. 1.]]

 [[1. 0. 0.]
  [0. 1. 0.]
  [0. 0. 1.]]

 [[1. 0. 0.]
  [0. 1. 0.]
  [0. 0. 1.]]

 [[1. 0. 0.]
  [0. 1. 0.]
  [0. 0. 1.]]] 



### Total loss calculation

In [24]:
isVerbose_en = True
isVerbose_loss = True

- For the first time run of the totalLoss function

In [25]:
choices = np.random.choice(len(x_train), size=batch_size, replace=False)

batch_x_train = x_train[choices]
batch_y_train = y_train[choices]

batch_train = (batch_x_train, batch_y_train)

- Define energy function

In [26]:
def computeEnergy(z_i):
    """
    compute E(z_i) in loss function
    """
    
    #inside_sum = 0
    inside_sum = tf.zeros(()) 
    for cluster in range(k):
        diff = tf.reshape(z_i - mu[cluster], (1,-1))   ### (1,3)
        diff_t = tf.reshape(diff, (-1,1)) #diff.reshape(-1,1)   ### (3,1)

        sigma_inv = tf.linalg.inv(sigma[cluster]) ### (3,3)
        
        exp_term = tf.exp(-0.5 * tf.matmul(diff, tf.matmul(sigma_inv, diff_t)))    ### (1,1)

        denom = tf.sqrt(tf.linalg.det(2 * np.pi * sigma[cluster]))
        
        inside_sum += phi[cluster] * (exp_term / denom)   ### (1,1)
        
        
    inside_sum = tf.reshape(inside_sum, ())
    sample_energy = -tf.log(inside_sum + 1e-6, name="sample_energy")
    
        
    ### flatten inside_sum and return log of it
    return sample_energy

- Define total loss

In [27]:
def totalLoss(yTrue, yPred):
    ### autoencoder loss
    autoenc_loss = tf.reduce_sum(((input_data - decoded)**2), axis=1)    ### (N,)
    autoenc_loss = tf.reduce_mean(autoenc_loss, axis=0)      #### mean over all N in batch
    
    ### obtain z and gamma for current batch
    z = layer_concat
    gamma = est_output
    
    
    ########### gmm update #################
    gamma = est_output ### + 1e-6
    z = layer_concat         

    ### update list
    updates_gmm = []

    ################### phi #################
    update_phi = tf.assign(phi, 
                           tf.reduce_sum(gamma/batch_size, axis=0), 
                           name="update_phi")
    updates_gmm.append(update_phi)
    #######################################

    ################## mu ################
    for cluster in range(k):
        ### get the corresponding column of predictions
        gamma_cluster = tf.reshape(gamma[:,cluster], (-1,1))    ### (N x 1)

        ### duplicate column d times
        gamma_cluster_tile = tf.tile(gamma_cluster, (1,d))   ### (N x d)

        ### sum over all batch and divide
        matmul = tf.matmul(z, gamma_cluster_tile, transpose_a=True)   ### (dxd)

        result = tf.reduce_sum(matmul, axis=0, name="mu_matmul_red") / tf.reduce_sum(gamma[:,cluster], axis = 0, name="mu_gamma_red")

        update_mu = tf.assign(mu[cluster], 
                              result,
                              name="update_mu"+str(cluster))
        updates_gmm.append(update_mu)
    #########################################

    ################ sigma ###############
    for cluster in range(k):
        ### expand gamma for each sample
        gamma_cluster = tf.reshape(gamma[:,cluster], (-1,1))    ### (N x 1)
        gamma_cluster_expand = tf.expand_dims(gamma_cluster, 1) ### (N x 1 x 1)


        #### calculating diff
        ## expand mu and z
        ######## TODO: race condition??? (get mu after its updated value (auto??))
        with tf.control_dependencies(updates_gmm):
            mu_cluster = tf.reshape(mu[cluster], (1,-1))   ### (1 x d)

        mu_expand = tf.expand_dims(mu_cluster, 1)      ### (1 x 1 x 3)
        mu_expand_tile = tf.tile(mu_expand, tf.stack([N, 1, 1]))   ### (N x 1 x d)

        z_expand = tf.expand_dims(z, 1)    ### (N x 1 x d)

        diff = z_expand - mu_expand_tile   ### (N x 1 x d)

        ### matmul in the upper part
        matmul = tf.matmul(diff, diff, transpose_a=True)  ### (N x d x d)


        ### nominator
        nom = gamma_cluster_expand * matmul   ### (N x d x d)
        nom_reduced = tf.reduce_sum(nom, axis=0) ### (d x d)


        ### denominator
        denom = tf.reduce_sum(gamma_cluster)  ### single value, zero dim

        update_sigma = tf.assign(sigma[cluster], 
                                 nom_reduced/denom, 
                                 name ="update_sigma"+str(cluster))
        updates_gmm.append(update_sigma)

    
    
    ### dependency control
    with tf.control_dependencies(updates_gmm):  
        ### sample energy   
        sample_en_batch = tf.map_fn(lambda z_i: computeEnergy(z_i), z)
        sample_en = tf.reduce_mean(sample_en_batch, axis=0)
        sample_en *= lambda_1

        p = tf.reduce_sum(1 / tf.matrix_diag_part(sigma))
        p *= lambda_2

    ### total loss
    total_loss = autoenc_loss + sample_en + p

    return total_loss  


In [28]:
### load previos weights
### full_network.load_weights("modelsave_weights-epochs5.h5")

In [29]:
### keras

adam = optimizers.adam(lr=learning_rate, clipnorm=1., clipvalue=0.5) 

full_network.compile(optimizer=adam, loss=totalLoss) 

In [30]:
### Timestamp
from datetime import datetime
import os

timestamp = datetime.now().strftime("%Y-%m-%d_%H:%M")
print(timestamp, "")

directory = '../../models/kddcup/'+timestamp
if not os.path.exists(directory):
    os.makedirs(directory)

2018-07-29_19:10 


- Batch generator for training

In [31]:
### TODO: optimization??

def batchGenerator():
    '''
    return: number of batch_size examples in each run
    '''
    
    while True:
        choices = np.random.choice(len(x_train), size=batch_size, replace=False)
        
        batch_x_train = x_train[choices]
        batch_y_train = y_train[choices]
        
        
        yield (batch_x_train, batch_y_train) 

- Training using fit_generator

In [32]:
steps_per_epoch = int(np.ceil(x_train.shape[0] / batch_size))

In [33]:
history = full_network.fit_generator(batchGenerator(),
                                     epochs = epochs,
                                     steps_per_epoch = steps_per_epoch,
                                     verbose = 1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


- Model save

In [37]:

full_network.save_weights('../../models/kddcup/{}/'.format(timestamp)+timestamp+"-modelsave_weights-epochs{}.h5".format(epochs), overwrite=True)

In [38]:
full_network.save('../../models/kddcup/{}/'.format(timestamp)+timestamp+"-modelsave-epochs{}.h5".format(epochs), overwrite=True)

- Save history

In [39]:
history_np = np.asarray(history.history)

In [40]:
np.savez_compressed('../../models/kddcup/{}/'.format(timestamp)+timestamp + "-history.npz", history = history_np)

- Last gmm params

In [41]:
printGmmParams()

#### GMM params ####
phi:
 [0.25 0.25 0.25 0.25] 

mu:
 [[1.1386565 1.1386565 1.1386565]
 [1.1386565 1.1386565 1.1386565]
 [1.1386565 1.1386565 1.1386565]
 [1.1386565 1.1386565 1.1386565]] 

sigma:
 [[[1.2112758  0.1714626  1.1308527 ]
  [0.1714626  0.02834561 0.15191464]
  [1.1308527  0.15191464 1.0722297 ]]

 [[1.2112758  0.1714626  1.1308527 ]
  [0.1714626  0.02834561 0.15191464]
  [1.1308527  0.15191464 1.0722297 ]]

 [[1.2112758  0.1714626  1.1308527 ]
  [0.1714626  0.02834561 0.15191464]
  [1.1308527  0.15191464 1.0722297 ]]

 [[1.2112758  0.1714626  1.1308527 ]
  [0.1714626  0.02834561 0.15191464]
  [1.1308527  0.15191464 1.0722297 ]]] 



- Gmm save

In [42]:
saveGmmParams(filename= '../../models/kddcup/{}/'.format(timestamp)+timestamp + "-gmmParams.npz")

In [43]:
print(timestamp)

2018-07-29_19:10
