In [None]:
import numpy as np
import h5py
import setGPU

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Lambda, BatchNormalization, Activation, Concatenate, Dropout, Layer
from tensorflow.keras.layers import ReLU, LeakyReLU
from tensorflow.keras import backend as K
import math

from datetime import datetime
from tensorboard import program
import os

from functions import preprocess_anomaly_data, custom_loss_negative, custom_loss_training,\
roc_objective,load_model, save_model
from custom_layers import Sampling
from autoencoder_classes import VAE

import pickle

In [None]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [None]:
# Data = (N,19,3,1).flatten()
with open('/eos/user/e/epuljak/forDelphes/Delphes_QCD_BSM_data.pkl', 'rb') as f:
    X_train_flatten, X_train_scaled, X_test_flatten, X_test_scaled, bsm_data, bsm_target, pt_scaler = pickle.load(f)

In [None]:
latent_dim = 3
input_shape = 57

In [None]:
#encoder
inputArray = Input(shape=(input_shape))
x = BatchNormalization()(inputArray)
x = Dense(32, kernel_initializer=tf.keras.initializers.HeUniform(seed=42))(x)
x = BatchNormalization()(x)
x = LeakyReLU(alpha=0.3)(x)
x = Dense(16, kernel_initializer=tf.keras.initializers.HeUniform(seed=42))(x)
x = BatchNormalization()(x)
x = LeakyReLU(alpha=0.3)(x)
mu = Dense(latent_dim, name = 'latent_mu', kernel_initializer=tf.keras.initializers.HeUniform(seed=42))(x)
logvar = Dense(latent_dim, name = 'latent_logvar', kernel_initializer=tf.keras.initializers.HeUniform(seed=42))(x)

# Use reparameterization trick to ensure correct gradient
z = Sampling()([mu, logvar])

# Create encoder
encoder = Model(inputArray, [mu, logvar, z], name='encoder')
encoder.summary()

#decoder
d_input = Input(shape=(latent_dim,), name='decoder_input')
x = Dense(16, kernel_initializer=tf.keras.initializers.HeUniform(seed=42))(d_input)
x = BatchNormalization()(x)
x = LeakyReLU(alpha=0.3)(x)
x = Dense(32, kernel_initializer=tf.keras.initializers.HeUniform(seed=42))(x)
x = BatchNormalization()(x)
x = LeakyReLU(alpha=0.3)(x)
dec = Dense(input_shape, kernel_initializer=tf.keras.initializers.HeUniform(seed=42))(x)

# Create decoder
decoder = Model(d_input, dec, name='decoder')
decoder.summary()


In [None]:
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam())

In [None]:
#logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
#tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir, update_freq='batch', write_images=True, profile_batch=0, histogram_freq=0)
#tensorboard_callback.set_model(vae)

# tensorboard = False
# if tensorboard:
#     tracking_address = os.path.join(os.getcwd(), "tracking_dir")
#     tb = program.TensorBoard()
#     tb.configure(argv=[None, '--logdir', tracking_address])
#     url = tb.launch()

#     if not os.path.exists(tracking_address):
#         os.makedirs(tracking_address)

#     now = datetime.now().strftime('%b%d_%H-%M-%S')

#     model_name = 'VAE_SGD'

#     exp_dir = os.path.join(tracking_address, model_name + '_' + str(now))
#     if not os.path.exists(exp_dir):
#         os.makedirs(exp_dir)

#     tb_dir = os.path.join(exp_dir, 'tb_logs')
#     if not os.path.exists(tb_dir):
#         os.makedirs(tb_dir)

#     # By default shows losses and metrics for both training and validation
#     tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
#                                               profile_batch=0,
#                                               histogram_freq=1)  # if 1 shows weights histograms
#     callbacks.append(tb_callback)
#     #%load_ext tensorboard
#     #%tensorboard --logdir /tracking_dir

## Train

In [None]:
EPOCHS = 150
BATCH_SIZE = 1024
VALIDATION_SPLIT = 0.3
#METRICS = [kl_loss, reco_loss]

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN
callbacks=[]
callbacks.append(ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1, min_lr=1E-6))
callbacks.append(TerminateOnNaN())
#callbacks.append(NeptuneMonitor())
callbacks.append(tf.keras.callbacks.EarlyStopping(monitor='val_loss',verbose=1, patience=10, restore_best_weights=True))

In [None]:
print("TRAINING")
history = vae.fit(X_train_flatten, X_train_scaled, epochs = EPOCHS, batch_size = BATCH_SIZE,
                  validation_split=0.3,
                  callbacks=callbacks)


In [None]:
import pathlib
import os
vae.save('Vae_Delphes_seedDense_beta70')

In [None]:
#model_enc, model_dec = VAE.load('Vae_Delphes_meanKL_SGD', custom_objects={'Sampling': Sampling})

## Plot training/validation loss

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.figure()
plt.plot(history.history['loss'][1:], label='Training loss')
plt.plot(history.history['val_loss'][1:], label='Validation loss')
plt.title('Training and validation loss - MAE')
plt.yscale('log', nonpositive='clip')
plt.legend(loc='best')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

In [None]:
plt.figure()
plt.plot(history.history['kl_loss'][1:], label='Training loss')
plt.plot(history.history['val_kl_loss'][1:], label='Validation loss')
plt.title('Training and validation KL loss')
plt.yscale('log', nonpositive='clip')
plt.legend(loc='best')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

In [None]:
# save_model('VAE_models/final_models/withCorrectPrefiltering/VAE_encoder_notpruned', vae.encoder)
# save_model('VAE_models/final_models/withCorrectPrefiltering/VAE_decoder_notpruned', vae.decoder)

# encoder = load_model('VAE_models/final_models/withCorrectPrefiltering/VAE_encoder_notpruned', custom_objects={'Sampling': Sampling})
# decoder = load_model('VAE_models/final_models/withCorrectPrefiltering/VAE_decoder_notpruned', custom_objects={'Sampling': Sampling})

In [None]:
bsm_labels = ['Leptoquark','A to 4 leptons', 'hChToTauNu', 'hToTauTau']

In [None]:
bsm_results = []

for i, label in enumerate(bsm_labels):
    mean_pred, logvar_pred, z_pred = encoder(bsm_data[i])
    bsm_prediction = decoder(z_pred)
    print(bsm_prediction.shape)
    bsm_results.append([label, bsm_target[i], bsm_prediction, mean_pred, logvar_pred, z_pred])

In [None]:
output_result = 'VAE_result_notpruned_alldata.h5'

In [None]:
h5f = h5py.File(output_result, 'w')
h5f.create_dataset('QCD', data = X_test_scaled)
h5f.create_dataset('QCD_input', data=X_test_flatten)
h5f.create_dataset('predicted_QCD', data = qcd_prediction)
h5f.create_dataset('encoded_mean_QCD', data = qcd_mean)
h5f.create_dataset('encoded_logvar_QCD', data = qcd_logvar)
h5f.create_dataset('encoded_z_QCD', data = qcd_z)
for i, bsm in enumerate(bsm_results):
    h5f.create_dataset('%s_scaled' %bsm[0], data=bsm[1])
    h5f.create_dataset('%s_input' %bsm[0], data=bsm_data[i])
    h5f.create_dataset('predicted_%s' %bsm[0], data=bsm[2])
    h5f.create_dataset('encoded_mean_%s' %bsm[0], data=bsm[3])
    h5f.create_dataset('encoded_logvar_%s' %bsm[0], data=bsm[4])
    h5f.create_dataset('encoded_z_%s' %bsm[0], data=bsm[5])

h5f.close()