In [1]:
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.layers import BatchNormalization, Dropout, LeakyReLU
from tensorflow.keras.models import Model
from tensorflow.keras.losses import mse
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import TerminateOnNaN

import tensorflow as tf
tf.compat.v1.disable_eager_execution()

import kerastuner
from kerastuner.tuners import Hyperband, BayesianOptimization

import numpy as np
import pandas as pd

print("GPUs Available: ", tf.config.list_physical_devices('GPU'), flush=True)
physical_devices = tf.config.list_physical_devices('GPU')
for gpu in physical_devices:
    tf.config.experimental.set_memory_growth(gpu, True)
tf.config.set_visible_devices(physical_devices[1:], 'GPU')
#print("GPUs Available: ", tf.config.list_logical_devices('GPU'), flush=True)


2022-08-07 04:24:28.787086: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


GPUs Available:  []


2022-08-07 04:24:30.338036: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-08-07 04:24:30.339088: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-08-07 04:24:30.352313: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2022-08-07 04:24:30.352346: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: 8557ee92613c
2022-08-07 04:24:30.352354: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: 8557ee92613c
2022-08-07 04:24:30.352451: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 510.39.1
2022-08-07 04:24:30.352469: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 510.39.1
2022-08-07 04:24:30.352474: I tensorflow/stream_executor/cuda/cuda_diagnosti

In [2]:
data_path = 'data/BioBank.xlsx'

bb_train_data = pd.read_excel(data_path, sheet_name='Training Set', engine='openpyxl')
bb_test_data = pd.read_excel(data_path, sheet_name='Testing Set', engine='openpyxl')

biobank_data = pd.concat([bb_train_data, bb_test_data], ignore_index = True)

In [3]:
# Data & model configuration
batch_size = 256
no_epochs = 1000
latent_dim = 18

original_dim = bb_train_data.shape[1]
input_shape = (original_dim,)

In [4]:
# recommended to do this here: https://www.tensorflow.org/tutorials/keras/keras_tuner
class ClearTrainingOutput(tf.keras.callbacks.Callback):
    def on_train_end(*args, **kwargs):
        IPython.display.clear_output(wait = True)
        
        
def model_builder(hp):
    # # =================
    # # Encoder
    # # =================

    # Definition
    i       = Input(shape=input_shape, name='encoder_input')
    
    x       = Dense(hp.Int('encoder_units',
                           min_value=30,
                           max_value=220,
                           step=10))(i)
    x       = LeakyReLU()(x)
    
    mu      = Dense(latent_dim, name='latent_mu')(x)
    sigma   = Dense(latent_dim, name='latent_sigma')(x)

    # Define sampling with reparameterization trick
    def sample_z(args):
        mu, sigma = args
        batch     = K.shape(mu)[0]
        dim       = K.int_shape(mu)[1]
        eps       = K.random_normal(shape=(batch, dim))
        return mu + K.exp(sigma / 2) * eps

    # Use reparameterization trick to ....??
    z       = Lambda(sample_z, output_shape=(latent_dim, ), name='z')([mu, sigma])

    # Instantiate encoder
    encoder = Model(i, [mu, sigma, z], name='encoder')
    
    # =================
    # Decoder
    # =================

    # Definition
    d_i   = Input(shape=(latent_dim, ), name='decoder_input')
    
    x     = Dense(hp.Int('decoder_units',
                           min_value=20,
                           max_value=220,
                           step=10))(d_i)
    x     = LeakyReLU()(x)
        
    o     = Dense(original_dim)(x)

    # Instantiate decoder
    decoder = Model(d_i, o, name='decoder')
    
    # =================
    # VAE as a whole
    # =================

    # Define loss
    def kl_reconstruction_loss(true, pred):
      # Reconstruction loss
        reconstruction_loss = mse(true, pred)
        reconstruction_loss *= original_dim

        # KL divergence loss
        kl_loss = 1 + sigma - K.square(mu) - K.exp(sigma)
        kl_loss = K.sum(kl_loss, axis=-1)
        kl_loss *= -0.5
        
        # weight KL divergence loss here
        kl_loss *= hp.Float(
        'kl_beta',
        min_value=1e-3,
        max_value=1e1,
        sampling='LOG',
        default=1e-2
        )

        return K.mean(reconstruction_loss + kl_loss)

    # Instantiate VAE
    vae_outputs = decoder(encoder(i)[2])
    vae         = Model(i, vae_outputs, name='vae')


    # Define optimizer
    optimizer = Adam(hp.Float(
        'learning_rate',
        min_value=1e-4,
        max_value=1e-2,
        sampling='LOG',
        default=1e-3
    ), clipnorm=1.0)

    # Compile VAE
    vae.compile(optimizer=optimizer, loss=kl_reconstruction_loss, metrics = ['mse'], experimental_run_tf_function=False)
    
    return vae
    

In [5]:
# Set tuner parameters
tuner = Hyperband(
    model_builder,
    objective='mse',
    factor=2,
    max_epochs=200,
    directory='hyperband_optimization',
    project_name='mtvae',
    overwrite=True)


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


In [6]:
tuner.search_space_summary()

Search space summary
Default search space size: 4
encoder_units (Int)
{'default': None, 'conditions': [], 'min_value': 30, 'max_value': 220, 'step': 10, 'sampling': None}
decoder_units (Int)
{'default': None, 'conditions': [], 'min_value': 20, 'max_value': 220, 'step': 10, 'sampling': None}
learning_rate (Float)
{'default': 0.001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}
kl_beta (Float)
{'default': 0.01, 'conditions': [], 'min_value': 0.001, 'max_value': 10.0, 'step': None, 'sampling': 'log'}


In [None]:
# Run Tuner
# Runtime on MacBook Pro 2017: 01h 15m
tuner.search(
    bb_train_data, bb_train_data,
    validation_data = (bb_test_data, bb_test_data),
    callbacks = []
)

Trial 67 Complete [00h 00m 24s]
mse: 4.7634968757629395

Best mse So Far: 0.04789607971906662
Total elapsed time: 00h 23m 58s

Search: Running Trial #68

Hyperparameter    |Value             |Best Value So Far 
encoder_units     |130               |60                
decoder_units     |140               |140               
learning_rate     |0.0096456         |0.0023747         
kl_beta           |6.0328            |0.023363          
tuner/epochs      |2                 |2                 
tuner/initial_e...|0                 |0                 
tuner/bracket     |7                 |7                 
tuner/round       |0                 |0                 

Train on 83893 samples, validate on 14805 samples
Epoch 1/2
   32/83893 [..............................] - ETA: 3:51 - loss: 304.9012 - mse: 1.2448

2022-08-07 04:52:14.803641: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set




## Print out best tuned parameters

In [None]:
tuner.results_summary(num_trials = 3)

In [None]:
tuner.get_best_models()[0].summary()

In [None]:
# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]

In [None]:
print(best_hps.get('encoder_units'))
print(best_hps.get('decoder_units'))
print(best_hps.get('learning_rate'))
print(best_hps.get('kl_beta'))

with open("optimal_parameters.csv", 'w') as f:
    f.write("param,value,integer\n")
    f.write("encoder_units,")
    f.write(str(best_hps.get('encoder_units')))
    f.write(",1\n")
    f.write("decoder_units,")
    f.write(str(best_hps.get('decoder_units')))
    f.write(",1\n")
    f.write("learning_rate,")
    f.write(str(best_hps.get('learning_rate')))
    f.write(",0\n")
    f.write("kl_beta,")
    f.write(str(best_hps.get('kl_beta')))
    f.write(",0")