# VAE sampler (generate sample from VAE Model)

In [1]:
%cd ..

/global/home/users/yifengh3/VAE/EMD_VAE


In [2]:
import tensorflow as tf
tf.config.experimental.set_visible_devices([], 'GPU')
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

import os
import os.path as osp
import sys

import numpy as np

import matplotlib
import matplotlib.pyplot as plt

import tensorflow.keras as keras
import tensorflow.keras.backend as K

%load_ext autoreload
%autoreload 2

# from utils.tf_sinkhorn import ground_distance_tf_nograd, sinkhorn_knopp_tf_scaling_stabilized_class
import utils.VAE_model_tools_leakyrelu
from utils.VAE_model_tools_leakyrelu import build_and_compile_annealing_vae, betaVAEModel, reset_metrics

import pandas
import matplotlib.pyplot as plt

import h5py
import pickle
from scipy.stats import gaussian_kde

1 Physical GPUs, 0 Logical GPUs
INFO:tensorflow:Using MirroredStrategy with devices ('/device:CPU:0',)
INFO:tensorflow:Single-worker MultiWorkerMirroredStrategy with local_devices = ('/device:CPU:0',), communication = CommunicationImplementation.AUTO


In [3]:
# Now load some data
# path to file
fn =  '/global/home/users/yifengh3/VAE/data/B_background.h5'

df = pandas.read_hdf(fn,stop=1000000)
print(df.shape)
print("Memory in GB:",sum(df.memory_usage(deep=True)) / (1024**3)+sum(df.memory_usage(deep=True)) / (1024**3))

# Data file contains, for each event, 50 particles (with zero padding), each particle with pT, eta, phi
data = df.values.reshape((-1,50,3))

# Normalize pTs so that HT = 1
HT = np.sum(data[:,:,0],axis=-1)
data[:,:,0] = data[:,:,0]/HT[:,None]

# Inputs x to NN will be: pT, eta, cos(phi), sin(phi), log E
# Separated phi into cos and sin for continuity around full detector, so make things easier for NN.
# Also adding the log E is mainly because it seems like it should make things easier for NN, since there is an exponential spread in particle energies.
# Feel free to change these choices as desired. E.g. px, py might be equally as good as pt, sin, cos.
sig_input = np.zeros((len(data),50,4))
sig_input[:,:,:2] = data[:,:,:2]
sig_input[:,:,2] = np.cos(data[:,:,2])
sig_input[:,:,3] = np.sin(data[:,:,2])
# no input from energy for B jets
# sig_input[:,:,4] = np.log(data[:,:,3]+1e-8)


data_x = sig_input
# Event 'labels' y are [pT, eta, phi], which is used to calculate EMD to output which is also pT, eta, phi.
data_y = data


train_x = data_x[:800000]
train_y = data_y[:800000]
valid_x = data_x[800000:]
valid_y = data_y[800000:]

(1000000, 150)
Memory in GB: 2.250075340270996


In [4]:
# load and build the model
model_prefix = "/global/home/users/yifengh3/VAE/B_results/method2_beta1"

import json
vae_args_file = osp.join(model_prefix,"vae_args.dat")
with open(vae_args_file,'r') as f:
  vae_arg_dict = json.loads(f.read())

print("\n\n vae_arg_dict:", vae_arg_dict)

vae, encoder, decoder = build_and_compile_annealing_vae(**vae_arg_dict)

batch_size=150
save_period=2

vae.beta.assign(0.001)

K.set_value(vae.optimizer.lr,1e-4)



 vae_arg_dict: {'encoder_conv_layers': [1024, 1024, 1024, 1024], 'dense_size': [1024, 1024, 1024, 1024], 'decoder_sizes': [1024, 1024, 1024, 1024, 1024], 'numItermaxinner': 20, 'numIter': 10, 'reg_init': 1.0, 'reg_final': 0.01, 'stopThr': 0.001, 'num_inputs': 4, 'num_particles_in': 50}
Instructions for updating:
Do not pass `graph_parents`.  They will  no longer be used.
Model: "VAE"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputs (InputLayer)             [(None, 50, 4)]      0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 50, 1024)     5120        inputs[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu (Leak

In [45]:
def generate_outjets(weight_file, validation_data_input, number_of_sampling=3,):
    """Generate outjets from VAE model
        INPUT:
        weight file: weight file location for VAE model
        validation_data_input: validation data input, stored as numpy array
        number_of_sampling: number of sampling, default to 3
        OUTPUT:
        outs_jet: numpy array of outjets, shape: (number_of_sampling x data_inputsize)
    """
    vae.load_weights(weight_file)
    outs_jet = np.stack([vae.predict(validation_data_input)[0] for j in range(number_of_sampling)])
    return outs_jet

def save_as_hdf5(original_jets,sampled_jets, file_prefix=".", savename = "jets.h5"):
    """Save data as hdf5 file
        original_jets: original input jets
        sampled_jets: sampled jets from VAE model
        file_prefix: output file prefix
        savename: output file name
    """
    hfile = h5py.File(osp.join(file_prefix,savename), 'w')
    hfile.create_dataset('original_jets', data=original_jets)
    hfile.create_dataset('sampled_jets', data=sampled_jets)
    hfile.close()

def generate_samples(weight_file, valid_input, originial_jet, number_of_sampling=3,
                     start_index=0, stop_index=1000, outfile_prefix=".", outfile_name="jets.h5"):
    
    jets = generate_outjets(weight_file, valid_x[start_index:stop_index], number_of_sampling=number_of_sampling)
    save_as_hdf5(originial_jet[start_index:stop_index],jets, 
                 file_prefix=outfile_prefix, savename = outfile_name)
    return jets

In [48]:
weight_file = osp.join(model_prefix,"checkpoint/model_weights_400.hdf5")
samples = generate_samples(weight_file, valid_x, valid_y, stop_index=2000, 
                 outfile_prefix="/global/home/users/yifengh3/VAE/B_results/method2_beta1",outfile_name="sampled_jets.h5" )