# Simple variational autoencoder



In [10]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.model_selection import train_test_split

from keras import backend as K
from keras import optimizers
from keras.layers import BatchNormalization as BN, Concatenate, Dense, Input, Lambda,Dropout
from keras.models import Model
from keras.losses import mean_squared_error, binary_crossentropy

In [4]:
data_loc = "/media/sergiu/workpc/tmp/iomics/cll_data/"
import pandas as pd
df_meth = pd.read_csv(data_loc + "CLL_data_Methylation.csv", index_col=0)
df_mrna = pd.read_csv(data_loc + "CLL_data_mRNA.csv", index_col=0)

# drop nans by column
df_mrna = df_mrna.dropna(axis='columns')
df_meth = df_meth.dropna(axis='columns')

X = pd.concat([df_mrna, df_meth])
X = X.dropna(axis='columns')
print(X.shape)
X_train, X_test = train_test_split(X)

(9248, 135)


In [5]:
def sampling(args):
    """Reparameterization trick by sampling fr an isotropic unit Gaussian.
    # Arguments:
        args (tensor): mean and log of variance of Q(z|X)
    # Returns:
        z (tensor): sampled latent vector
    """
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

In [12]:

# s1_train.shape[1]+s2_train.shape[1]
#input_size = s1_train.shape[1]+s2_train.shape[1]
input_size = 135
# relu?, https://keras.io/activations/
act = "elu"
# the intermediate dense layers size: 128 256 512
ds = 128
# latent space dimension size 16 32 64
ls = 16
# dropout rate [0 1]
dropout = 0.2
# KL adjustement parameter [0 1]?, 1 10 15 25 50 100 ?
beta = 1

# build the model
np.random.seed(42)
tf.random.set_seed(42)

# Build the encoder network

# Input
inputs = Input(shape=(input_size,), name='concat_input')
#inputs = [concat_inputs]

# Encoding layer
x = Dense(ds, activation=act)(inputs)
x = BN()(x)      

# Embedding layer
z_mean = Dense(ls, name='z_mean')(x)
z_log_sigma = Dense(ls, name='z_log_sigma', kernel_initializer='zeros')(x)
z = Lambda(sampling, output_shape=(ls,), name='z')([z_mean, z_log_sigma])

encoder = Model(inputs, [z_mean, z_log_sigma, z], name='encoder')
encoder.summary()


# Build the decoder network

# Dense out
latent_inputs = Input(shape=(ls,), name='z_sampling')
x = latent_inputs
x = Dense(ds, activation=act)(x)
x = BN()(x)

x = Dropout(dropout)(x)

# Out
concat_out = Dense(input_size)(x)

decoder = Model(latent_inputs, concat_out, name='decoder')
decoder.summary()


outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae_mlp')

# Define the loss
distance = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma)
distance = K.sum(distance, axis=-1)
distance *= -0.5
reconstruction_loss = mean_squared_error(inputs, outputs)
vae_loss = K.mean(reconstruction_loss + beta * distance)
vae.add_loss(vae_loss)

adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.001, amsgrad=False)
vae.compile(optimizer=adam)
vae.summary()

Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
concat_input (InputLayer)       [(None, 135)]        0                                            
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 128)          17408       concat_input[0][0]               
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 128)          512         dense_3[0][0]                    
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 16)           2064        batch_normalization_2[0][0]      
____________________________________________________________________________________________

In [15]:
history = vae.fit(X_train, X_train, epochs=450, batch_size=256, shuffle=True, validation_data=(X_test, X_test))
#vae.fit(train, epochs=epochs, batch_size=bs, shuffle=True, validation_data=(train, None))

Epoch 1/450
Epoch 2/450
Epoch 3/450
Epoch 4/450
Epoch 5/450
Epoch 6/450
Epoch 7/450
Epoch 8/450
Epoch 9/450
Epoch 10/450
Epoch 11/450
Epoch 12/450
Epoch 13/450
Epoch 14/450
Epoch 15/450
Epoch 16/450
Epoch 17/450
Epoch 18/450
Epoch 19/450
Epoch 20/450
Epoch 21/450
Epoch 22/450
Epoch 23/450
Epoch 24/450
Epoch 25/450
Epoch 26/450
Epoch 27/450
Epoch 28/450
Epoch 29/450
Epoch 30/450
Epoch 31/450
Epoch 32/450
Epoch 33/450
Epoch 34/450
Epoch 35/450
Epoch 36/450
Epoch 37/450
Epoch 38/450
Epoch 39/450
Epoch 40/450
Epoch 41/450
Epoch 42/450
Epoch 43/450
Epoch 44/450
Epoch 45/450
Epoch 46/450
Epoch 47/450
Epoch 48/450
Epoch 49/450
Epoch 50/450
Epoch 51/450
Epoch 52/450
Epoch 53/450
Epoch 54/450
Epoch 55/450
Epoch 56/450
Epoch 57/450
Epoch 58/450
Epoch 59/450
Epoch 60/450
Epoch 61/450
Epoch 62/450
Epoch 63/450
Epoch 64/450
Epoch 65/450
Epoch 66/450
Epoch 67/450
Epoch 68/450
Epoch 69/450
Epoch 70/450
Epoch 71/450
Epoch 72/450
Epoch 73/450
Epoch 74/450
Epoch 75/450
Epoch 76/450
Epoch 77/450
Epoch 78

In [16]:
vae.save_weights('./vae_cncvae.h5')