In [1]:
from models import *

import numpy as np
import pandas as pd

print("GPUs Available: ", tf.config.list_physical_devices('GPU'), flush=True)
physical_devices = tf.config.list_physical_devices('GPU')
for gpu in physical_devices:
    tf.config.experimental.set_memory_growth(gpu, True)
tf.config.set_visible_devices(physical_devices[1:], 'GPU')
#print("GPUs Available: ", tf.config.list_logical_devices('GPU'), flush=True)

2022-08-07 19:54:47.694403: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


GPUs Available:  []


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
2022-08-07 19:54:49.579449: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-08-07 19:54:49.580585: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-08-07 19:54:49.595274: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2022-08-07 19:54:49.595304: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: 8557ee92613c
2022-08-07 19:54:49.595312: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: 8557ee92613c
2022-08-07 19:54:49.595432: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 510.39.1
2022-08-07 19:54:49.595453: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported vers

In [2]:
# Model and reconstruction paths
model_path = 'models/'
recon_path = 'results/reconstructions/'

# VAE model paths
path_vae     = model_path + 'VAE.h5'
path_encoder = model_path + 'VAE_encoder.h5'
path_decoder = model_path + 'VAE_decoder.h5'

# Data paths
USE_NA = False
if USE_NA:
    twins_path = "data/BioBank_NA.xlsx"
else:
    twins_path  = 'data/BioBank.xlsx'

In [3]:
# Load datasets
twins_train_df = pd.read_excel(twins_path, sheet_name='Training Set', engine='openpyxl')
twins_test_df  = pd.read_excel(twins_path, sheet_name='Testing Set', engine='openpyxl')
twins_full_data = pd.concat([twins_train_df, twins_test_df], ignore_index = True)

In [4]:
# Data & model configuration
input_dim = twins_train_df.shape[1]

# Note, to reconstruct Figure 2a and 2b, which shows
# the correlation matrix MSE curve for varying latent dimensionalities d,
# run the following code in a loop with 
#latent_dims = [5, 10, 15, 18, 20, 30, 40, 60, 80, 100, 120, 160, 200]

params = load_vae_parameters(optimal=False)

intermediate_dim = max(params['encoder_units'], params['decoder_units'])
latent_dim = 18

kl_beta = params['kl_beta']
learning_rate = params['learning_rate']

batch_size = 256
n_epochs = 1000
# Instantiate model
mtmodel = mtVAE(input_dim,
                intermediate_dim,
                latent_dim,
                kl_beta,
                learning_rate)


# Load VAE model
mtmodel.vae.load_weights(path_vae)
mtmodel.encoder.load_weights(path_encoder)
mtmodel.decoder.load_weights(path_decoder)


if not USE_NA:
    ######################
    # Define PCA model
    ######################
    PCA_model_ = PCA_model(twins_train_df.values, latent_dim)

    ######################
    # Define KPCA models
    ######################
    poly_KPCA_model_ = KPCA_model(twins_train_df.values, latent_dim,"poly", 2, 0.001, 3, 5.0)
    cosine_KPCA_model_ = KPCA_model(twins_train_df.values,latent_dim,"cosine", 1, 0, 0, 0)
    sigmoid_KPCA_model_ = KPCA_model(twins_train_df.values,latent_dim,"sigmoid", 1, 0.05, 0, 0)
    rbf_KPCA_model_ = KPCA_model(twins_train_df.values,latent_dim,"rbf", 1, 0.005, 0, 0)

2022-08-07 19:58:47.640642: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-07 19:58:47.652786: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-08-07 19:58:47.673729: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:196] None of the MLIR optimization passes are enabled (registered 0 passes)
2022-08-07 19:58:47.676975: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2749780000 Hz
  self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True)


## Create and save data reconstructions

In [5]:
# Create save paths for reconstructed data
train_vae_recon = f'{recon_path}BB_train_VAE_reconstruction_d_{latent_dim}.csv'
if not USE_NA:
    train_pca_recon = f'{recon_path}BB_train_PCA_reconstruction_d_{latent_dim}.csv'
    train_cosine_kpca_recon = f'{recon_path}BB_train_cosine_KPCA_reconstruction_d_{latent_dim}.csv'
    train_sigmoid_kpca_recon = f'{recon_path}BB_train_sigmoid_KPCA_reconstruction_d_{latent_dim}.csv'
    train_rbf_kpca_recon = f'{recon_path}BB_train_rbf_KPCA_reconstruction_d_{latent_dim}.csv'
    train_poly_kpca_recon = f'{recon_path}BB_train_poly_KPCA_reconstruction_d_{latent_dim}.csv'

test_vae_recon = f'{recon_path}BB_test_VAE_reconstruction_d_{latent_dim}.csv'
if not USE_NA:
    test_pca_recon = f'{recon_path}BB_test_PCA_reconstruction_d_{latent_dim}.csv'
    test_cosine_kpca_recon = f'{recon_path}BB_test_cosine_KPCA_reconstruction_d_{latent_dim}.csv'
    test_sigmoid_kpca_recon = f'{recon_path}BB_test_sigmoid_KPCA_reconstruction_d_{latent_dim}.csv'
    test_rbf_kpca_recon = f'{recon_path}BB_test_rbf_KPCA_reconstruction_d_{latent_dim}.csv'
    test_poly_kpca_recon = f'{recon_path}BB_test_poly_KPCA_reconstruction_d_{latent_dim}.csv'

In [6]:
vae_rcon = pd.DataFrame(mtmodel.reconstruct(twins_train_df.values),
                            columns = twins_train_df.columns)
vae_rcon.to_csv(train_vae_recon, index=False)
del vae_rcon
vae_rcon = pd.DataFrame(mtmodel.reconstruct(twins_test_df.values),
                            columns = twins_test_df.columns)
vae_rcon.to_csv(test_vae_recon, index=False)
del vae_rcon



In [7]:
pca_rcon = pd.DataFrame(PCA_model_.reconstruct(twins_train_df.values),
                            columns = twins_train_df.columns)
pca_rcon.to_csv(train_pca_recon, index=False)
del pca_rcon
pca_rcon = pd.DataFrame(PCA_model_.reconstruct(twins_test_df.values),
                            columns = twins_test_df.columns)
pca_rcon.to_csv(test_pca_recon, index=False)
del pca_rcon

In [8]:
kpca_rcon = pd.DataFrame(cosine_KPCA_model_.reconstruct(twins_train_df.values),
                            columns = twins_train_df.columns)
kpca_rcon.to_csv(train_cosine_kpca_recon, index=False)
del kpca_rcon
kpca_rcon = pd.DataFrame(cosine_KPCA_model_.reconstruct(twins_test_df.values),
                            columns = twins_test_df.columns)
kpca_rcon.to_csv(test_cosine_kpca_recon, index=False)
del kpca_rcon

In [9]:
kpca_rcon = pd.DataFrame(sigmoid_KPCA_model_.reconstruct(twins_train_df.values),
                            columns = twins_train_df.columns)
kpca_rcon.to_csv(train_sigmoid_kpca_recon, index=False)
del kpca_rcon
kpca_rcon = pd.DataFrame(sigmoid_KPCA_model_.reconstruct(twins_test_df.values),
                            columns = twins_test_df.columns)
kpca_rcon.to_csv(test_sigmoid_kpca_recon, index=False)
del kpca_rcon

In [10]:
kpca_rcon = pd.DataFrame(rbf_KPCA_model_.reconstruct(twins_train_df.values),
                            columns = twins_train_df.columns)
kpca_rcon.to_csv(train_rbf_kpca_recon, index=False)
del kpca_rcon
kpca_rcon = pd.DataFrame(rbf_KPCA_model_.reconstruct(twins_test_df.values),
                            columns = twins_test_df.columns)
kpca_rcon.to_csv(test_rbf_kpca_recon, index=False)
del kpca_rcon

In [11]:
kpca_rcon = pd.DataFrame(poly_KPCA_model_.reconstruct(twins_train_df.values),
                            columns = twins_train_df.columns)
kpca_rcon.to_csv(train_poly_kpca_recon, index=False)
del kpca_rcon
kpca_rcon = pd.DataFrame(poly_KPCA_model_.reconstruct(twins_test_df.values),
                            columns = twins_test_df.columns)
kpca_rcon.to_csv(test_poly_kpca_recon, index=False)
del kpca_rcon