### End to end train both networks

In [125]:
from keras.layers import Input, Dense, Activation, Lambda, Dropout, Concatenate, Reshape
from keras.models import Model, Sequential
from keras.callbacks import TensorBoard
from keras import optimizers
from keras import backend as K

import tensorflow as tf

from keras.regularizers import l2


In [130]:
### Hyperparameters

experiment_id = 0

learning_rate = 0.0001

epoch_list = [200, 20000, 10000, 4000]
batch_size_list = [1024, 1024, 128, 1024]

lambda_1 = 0.1       
lambda_2 = 0.005     

#################

epochs = epoch_list[experiment_id]
batch_size = batch_size_list[experiment_id]

### Define compresion network

In [131]:
input_data = Input(batch_shape=(batch_size,21), name='input_placeholder')

encoded = Dense(18, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(input_data)
encoded = Dense(12, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(encoded)
encoded = Dense(4, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(encoded)
# encoded = Dense(1, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(encoded)

layer_lowdim = Dense(1, activation='linear', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001), name='lowdim')(encoded)

decoded = Dense(4, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(layer_lowdim)
decoded = Dense(12, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(decoded)
decoded = Dense(18, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(decoded)
decoded = Dense(21, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(decoded)
# decoded = Dense(120, activation='linear', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001), name='reconstructed')(decoded)



- Define similarity metrics

In [132]:
def cos_sim(a_b):
    '''
    a: batch x 120
    b: batch x 120 
    
    output: batch x 1
    '''
    a, b = a_b
    
    norm_a = K.sqrt(K.sum(a ** 2, axis=-1))
    norm_b = K.sqrt(K.sum(b ** 2, axis=-1))
    
    out = K.sum(a * b, axis=-1) / (norm_a * norm_b)
    out = K.reshape(out, [batch_size, 1])
    
    return out

def relative_euc_dist(a_b):
    '''
    a: batch x 120
    b: batch x 120 
    
    output: batch x 1
    '''
    a,b = a_b
    
    norm_diff = K.sqrt(K.sum((a - b)**2, axis=-1))
    norm_a = K.sqrt(K.sum(a ** 2, axis=-1))
    
    out = norm_diff / norm_a
    out = K.reshape(out, [batch_size, 1])

    return out

In [133]:
layer_cossim = Lambda(cos_sim,
                      name='cos_sim')([input_data, decoded])

In [134]:
layer_relativeEuc = Lambda(relative_euc_dist, 
                           name='relative_euc_dist')([input_data, decoded])

- Obtain "z"

In [135]:
def funct_concat(tensors):
    return K.concatenate(tensors)

layer_concat = Lambda(funct_concat, name="z")([layer_lowdim, layer_cossim, layer_relativeEuc])

### Define estimation network

In [136]:
input_est = layer_concat #Input(shape=(3,))(layer_concat)

est_layer = Dense(10, activation='tanh', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001))(input_est)   ####(input_est)
est_layer = Dropout(0.5)(est_layer)
est_output = Dense(2, activation='softmax', kernel_regularizer=l2(0.00001), bias_regularizer=l2(0.00001), name='gamma')(est_layer)


### Build full network

In [137]:
full_network = Model(input=input_data, outputs=est_output)

  """Entry point for launching an IPython kernel.


In [138]:
full_network.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_placeholder (InputLayer)  (1024, 21)           0                                            
__________________________________________________________________________________________________
dense_34 (Dense)                (1024, 18)           396         input_placeholder[0][0]          
__________________________________________________________________________________________________
dense_35 (Dense)                (1024, 12)           228         dense_34[0][0]                   
__________________________________________________________________________________________________
dense_36 (Dense)                (1024, 4)            52          dense_35[0][0]                   
__________________________________________________________________________________________________
lowdim (De

### Load already saved dataset

In [139]:
import numpy as np

In [141]:
with np.load('thyroid_train-randomState_None.npz') as data: 
    x_train = data["x_train"]
    y_train = data["y_train"]
    
# labels removed
x_train = x_train[:, 0:-2]
print(x_train)

[[0.5        0.         0.         ... 0.29707113 0.5        0.22138837]
 [0.58333333 0.         0.         ... 0.19832636 0.39351852 0.18230144]
 [0.60416667 0.         0.         ... 0.18189121 0.37962963 0.17146654]
 ...
 [0.76041667 0.         0.         ... 0.09958159 0.28240741 0.12132583]
 [0.15625    1.         1.         ... 0.23012552 0.33333333 0.24171357]
 [0.65625    0.         0.         ... 0.16317992 0.31018519 0.18230144]]


In [142]:
x_train.shape

(3306, 21)

In [143]:
y_train.shape

(3306, 2)

### GMM parameters

- k: number of clusters (4 for here)
- N: batch size
- d: Dimension of latent vector z (3 here)

- gamma ($\gamma$) : membership predictions (softmax output of estimation net) [$N \times K$]
- phi ($\phi$): gaussian probabilities [$K$]
- mu ($\mu$): gaussian means [$K \times d$]
- sigma ($\Sigma$): gaussian covariances [$K \times d \times d$]

In [144]:
k = 2 
N = batch_size 
d = int(layer_concat.get_shape()[1])

- GMM parameters init

In [145]:
phi = tf.get_variable("phi",
                      shape=(k),
                      dtype=tf.float32,
                      initializer=tf.zeros_initializer(),
                      trainable=False)

mu = tf.get_variable("mu",
                      shape=(k,d),
                      dtype=tf.float32,
                      initializer=tf.zeros_initializer(), 
                      trainable=False)

sigma_init = np.repeat([np.eye(d, dtype=np.float32)], k, axis=0)

sigma = tf.get_variable("sigma",
                      shape=(k,d,d),
                      dtype=tf.float32,
                      initializer=tf.constant_initializer(sigma_init),
                      trainable=False)

ValueError: Variable phi already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "<ipython-input-22-dd8806924096>", line 5, in <module>
    trainable=False)
  File "/Users/jyotirmaysenapati/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/Users/jyotirmaysenapati/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):


In [146]:
tf_sess = K.get_session()

In [147]:
tf_sess.run([phi.initializer,
             mu.initializer, 
             sigma.initializer])

[None, None, None]

- Check for uninit vars

In [148]:
print(tf_sess.run(tf.report_uninitialized_variables()))

[]


In [149]:
def printGmmParams():
    print("#### GMM params ####")
    print("phi:\n",K.eval(phi),"\n")
    print("mu:\n",K.eval(mu),"\n")
    print("sigma:\n", K.eval(sigma),"\n")



In [150]:
def saveGmmParams(filename = "gmmParams.npz"):
    phi_ = K.eval(phi)
    mu_ = K.eval(mu)
    sigma_ = K.eval(sigma)
    np.savez_compressed(filename, phi=phi_, mu=mu_, sigma=sigma_)

In [151]:
printGmmParams()

#### GMM params ####
phi:
 [0. 0.] 

mu:
 [[0. 0. 0.]
 [0. 0. 0.]] 

sigma:
 [[[1. 0. 0.]
  [0. 1. 0.]
  [0. 0. 1.]]

 [[1. 0. 0.]
  [0. 1. 0.]
  [0. 0. 1.]]] 



### Total loss calculation

In [152]:
isVerbose_en = True
isVerbose_loss = True

- For the first time run of the totalLoss function

In [153]:
choices = np.random.choice(len(x_train), size=batch_size, replace=False)

batch_x_train = x_train[choices]
batch_y_train = y_train[choices]

batch_train = (batch_x_train, batch_y_train)

- Define energy function

In [154]:
def computeEnergy(z_i):
    """
    compute E(z_i) in loss function
    """
    
    #inside_sum = 0
    inside_sum = tf.zeros(()) 
    for cluster in range(k):
        diff = tf.reshape(z_i - mu[cluster], (1,-1))   ### (1,3)
        diff_t = tf.reshape(diff, (-1,1)) #diff.reshape(-1,1)   ### (3,1)

        sigma_inv = tf.linalg.inv(sigma[cluster]) ### (3,3)
        
        exp_term = tf.exp(-0.5 * tf.matmul(diff, tf.matmul(sigma_inv, diff_t)))    ### (1,1)

        denom = tf.sqrt(tf.linalg.det(2 * np.pi * sigma[cluster]))
        
        inside_sum += phi[cluster] * (exp_term / denom)   ### (1,1)
        
        
    inside_sum = tf.reshape(inside_sum, ())
    sample_energy = -tf.log(inside_sum + 1e-6, name="sample_energy")
    
        
    ### flatten inside_sum and return log of it
    return sample_energy

- Define total loss

In [155]:
def totalLoss(yTrue, yPred):
    ### autoencoder loss
    autoenc_loss = tf.reduce_sum(((input_data - decoded)**2), axis=1)    ### (N,)
    autoenc_loss = tf.reduce_mean(autoenc_loss, axis=0)      #### mean over all N in batch
    
    ### obtain z and gamma for current batch
    z = layer_concat
    gamma = est_output
    
    
    ########### gmm update #################
    gamma = est_output ### + 1e-6
    z = layer_concat         

    ### update list
    updates_gmm = []

    ################### phi #################
    update_phi = tf.assign(phi, 
                           tf.reduce_sum(gamma/batch_size, axis=0), 
                           name="update_phi")
    updates_gmm.append(update_phi)
    #######################################

    ################## mu ################
    for cluster in range(k):
        ### get the corresponding column of predictions
        gamma_cluster = tf.reshape(gamma[:,cluster], (-1,1))    ### (N x 1)

        ### duplicate column d times
        gamma_cluster_tile = tf.tile(gamma_cluster, (1,d))   ### (N x d)

        ### sum over all batch and divide
        matmul = tf.matmul(z, gamma_cluster_tile, transpose_a=True)   ### (dxd)

        result = tf.reduce_sum(matmul, axis=0, name="mu_matmul_red") / tf.reduce_sum(gamma[:,cluster], axis = 0, name="mu_gamma_red")

        update_mu = tf.assign(mu[cluster], 
                              result,
                              name="update_mu"+str(cluster))
        updates_gmm.append(update_mu)
    #########################################

    ################ sigma ###############
    for cluster in range(k):
        ### expand gamma for each sample
        gamma_cluster = tf.reshape(gamma[:,cluster], (-1,1))    ### (N x 1)
        gamma_cluster_expand = tf.expand_dims(gamma_cluster, 1) ### (N x 1 x 1)


        #### calculating diff
        ## expand mu and z
        ######## TODO: race condition??? (get mu after its updated value (auto??))
        with tf.control_dependencies(updates_gmm):
            mu_cluster = tf.reshape(mu[cluster], (1,-1))   ### (1 x d)

        mu_expand = tf.expand_dims(mu_cluster, 1)      ### (1 x 1 x 3)
        mu_expand_tile = tf.tile(mu_expand, tf.stack([N, 1, 1]))   ### (N x 1 x d)

        z_expand = tf.expand_dims(z, 1)    ### (N x 1 x d)

        diff = z_expand - mu_expand_tile   ### (N x 1 x d)

        ### matmul in the upper part
        matmul = tf.matmul(diff, diff, transpose_a=True)  ### (N x d x d)


        ### nominator
        nom = gamma_cluster_expand * matmul   ### (N x d x d)
        nom_reduced = tf.reduce_sum(nom, axis=0) ### (d x d)


        ### denominator
        denom = tf.reduce_sum(gamma_cluster)  ### single value, zero dim

        update_sigma = tf.assign(sigma[cluster], 
                                 nom_reduced/denom, 
                                 name ="update_sigma"+str(cluster))
        updates_gmm.append(update_sigma)

    
    
    ### dependency control
    with tf.control_dependencies(updates_gmm):  
        ### sample energy   
        sample_en_batch = tf.map_fn(lambda z_i: computeEnergy(z_i), z)
        sample_en = tf.reduce_mean(sample_en_batch, axis=0)
        sample_en *= lambda_1

        p = tf.reduce_sum(1 / tf.matrix_diag_part(sigma))
        p *= lambda_2

    ### total loss
    total_loss = autoenc_loss + sample_en + p

    return total_loss  


In [156]:
### load previos weights
### full_network.load_weights("modelsave_weights-epochs5.h5")

In [157]:
### keras

adam = optimizers.adam(lr=learning_rate, clipnorm=1., clipvalue=0.5) 

full_network.compile(optimizer=adam, loss=totalLoss) 

In [158]:
### Timestamp
from datetime import datetime
timestamp = datetime.now().strftime("%Y-%m-%d_%H:%M")
print(timestamp, "")

2018-07-28_14:39 


- Batch generator for training

In [159]:
### TODO: optimization??

def batchGenerator():
    '''
    return: number of batch_size examples in each run
    '''
    
    while True:
        choices = np.random.choice(len(x_train), size=batch_size, replace=False)
        
        batch_x_train = x_train[choices]
        batch_y_train = y_train[choices]
        
        
        yield (batch_x_train, batch_y_train) 

- Training using fit_generator

In [160]:
steps_per_epoch = int(np.ceil(x_train.shape[0] / batch_size))

In [161]:
history = full_network.fit_generator(batchGenerator(),
                                     epochs = epochs,
                                     steps_per_epoch = steps_per_epoch,
                                     verbose = 1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 

- Model save

In [162]:

full_network.save_weights(timestamp+"-modelsave_weights-epochs{}.h5".format(epochs), overwrite=True)

In [163]:
full_network.save(timestamp+"-modelsave-epochs{}.h5".format(epochs), overwrite=True)

- Save history

In [164]:
history_np = np.asarray(history.history)

In [165]:
np.savez_compressed(timestamp + "-history.npz", history = history_np)

- Last gmm params

In [166]:
printGmmParams()

#### GMM params ####
phi:
 [0.4994552  0.50054485] 

mu:
 [[1.410799  1.410799  1.410799 ]
 [1.4095881 1.4095881 1.4095881]] 

sigma:
 [[[2.0660095  1.004974   0.98583096]
  [1.004974   0.4976533  0.47199622]
  [0.98583096 0.47199622 0.4769822 ]]

 [[2.063212   1.0108837  0.97651047]
  [1.0108837  0.5045837  0.47049367]
  [0.97651047 0.47049367 0.469099  ]]] 



- Gmm save

In [167]:
saveGmmParams(filename= timestamp + "-gmmParams.npz")