In [1]:
import sys

sys.path.append("..")

In [2]:
from pae.loaders.LHCO import ScalarLoaderLHCO, DatasetBuilder

In [3]:
x = ScalarLoaderLHCO.from_json("../pae/configs/loader/rnd_scalar_2j.json")
mjj = ScalarLoaderLHCO.from_json("../pae/configs/loader/rnd_scalar_mjj.json")

In [4]:
builder = DatasetBuilder(x, mjj)

In [5]:
builder.data_preparation(sample_sizes ={'sig':1000, 'bkg': 1_000_000}, fit_key='bkg')



In [6]:
spec = {'train':{'bkg':1_000_000}, 'test':{'sig':100, 'bkg': 1000}}

In [7]:
dataset = builder.make_dataset(train = {'bkg':1_000_000}, test={'sig':100, 'bkg': 10_000}, replace=True)

In [8]:
from pae.density import GMM, ConvKDE
import numpy as np


fftkde = ConvKDE()
fftkde.fit(dataset["mjj_train"])#, range=(1000, 9500)) 
y_kde = fftkde.evaluate(dataset["mjj_train"])

In [9]:
x_ref = np.linspace(1600, 8000, 1701)

y_kde = fftkde.evaluate(x_ref)

import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

from plotly.offline import download_plotlyjs, init_notebook_mode
init_notebook_mode(connected = True)
pio.templates.default = "plotly_dark"

In [10]:
from pae.models.autoencoder import DenseAutoencoder
from pae.models.flows import MAF
from pae.models.nn import PaeBuilder
import tensorflow as tf
import tensorflow.keras as tfk

ae_config = {
    'input_dim':47, 
    'encoding_dim':10, 
    'units':[30, 20, 15],
    'weight_reg':tfk.regularizers.L1L2(l1=1e-5, l2=1e-4),
    'output_activation':tf.nn.sigmoid
}
nf_config = {
    'n_dims':10, 
    'n_layers':5, 
    'units':[32 for _ in range(4)]
}
optimizer_ae = {
    'learning_rate': 0.001
}
optimizer_nf = {
    'learning_rate': 0.005
}

builder = PaeBuilder()
builder.make_ae_model(DenseAutoencoder, ae_config)
builder.make_ae_optimizer(tfk.optimizers.Adam, optimizer_ae)
builder.make_nf_model(MAF, nf_config)
builder.make_nf_optimizer(tfk.optimizers.Adam, optimizer_nf)
builder.compile_ae()
builder.compile_nf()
pae = builder.pae
pae.ae(np.zeros(47).reshape(1,-1))
pae.nf(np.zeros(10).reshape(1,-1))
pae.ae.load_weights("./logs/full-cpu-kde-20211020-165124/ae.h5")
pae.nf.load_weights("./logs/full-cpu-kde-20211020-165124/nf.h5")

2021-10-21 18:01:37.675307: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-10-21 18:01:39.085851: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30997 MB memory:  -> device: 0, name: NVIDIA Tesla V100S-PCIE-32GB, pci bus id: 0000:3b:00.0, compute capability: 7.0
2021-10-21 18:01:39.087514: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 30997 MB memory:  -> device: 1, name: NVIDIA Tesla V100S-PCIE-32GB, pci bus id: 0000:d8:00.0, compute capability: 7.0


In [11]:
from sklearn.model_selection import KFold

fold5 = KFold(8, shuffle=True)
q= fold5.split(dataset["x_train"])
x_train, x_valid = next(q)
print(x_train.shape)
print(x_valid.shape)

(875000,)
(125000,)


In [15]:
import tensorflow_probability as tfp
import tqdm
import os
from concurrent.futures import ProcessPoolExecutor 
tfd = tfp.distributions
pae.compute_implicit_sigma(dataset['x_train'][x_valid])
from datetime import datetime
STEPS = 500
BATCH_SIZE = 200

sigma = tf.constant(tf.sqrt(pae.sigma_square))
z_ = tf.Variable(pae.ae.encoder(dataset['x_test']))
opt = tf.optimizers.Adam(learning_rate=0.001)

timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
map_summary_writer = tf.summary.create_file_writer(f"./testing/{timestamp}/map")
print(z_.shape)

@tf.function
def max_apriori_prob(x, z, sigma, pae):
    distrs = tfd.MultivariateNormalDiag(loc=x, scale_diag=sigma)
    nf_ll = pae.nf(z)
    reco = pae.ae.decoder(z)
    gauss_ll = distrs.log_prob(reco)
    #tf.print("gauss:", gaussll, "nf:", nfll, "\n")
    return  tf.reduce_mean(-nf_ll - gauss_ll) 


@tf.function
def find_map(x_):
    global z_
    if z_ is None:
        z_ = tf.Variable(pae.ae.encoder(x_))
    z_.assign(pae.ae.encoder(x_))
    for i in range(STEPS):
        with tf.GradientTape() as tape:
            tape.watch(z_)
            nll = max_apriori_prob(x_, z_, sigma, pae)
        grad = tape.gradient(nll, [z_])
        opt.apply_gradients(zip(grad, [z_]))
        with map_summary_writer.as_default():
            tf.summary.scalar('nll', nll, step=i)
    return z_

@tf.function
def tf_graph_map(*args, parallel_iterations=1000):
    return tf.map_fn(*args, parallel_iterations=parallel_iterations)

(10100, 10)


In [16]:
ds = tf.convert_to_tensor(dataset['x_test'], dtype=tf.float32)

#ds.shape
# ds = tf.data.Dataset.from_tensor_slices(dataset['x_test'].astype(np.float32))
# ds = ds.cache()
#ds = ds.batch(BATCH_SIZE)
#ds = ds.prefetch()
#print(ds)

In [17]:
%%time
tf.profiler.experimental.start(f"./testing/{timestamp}")
with tf.device("GPU:0"):
    # ta = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
    # for i, x in enumerate(ds):
    #     ta.write(i, find_map(x))
    # z_map = ta.concat()
    # ta.close()
    z_map = find_map(ds)
tf.profiler.experimental.stop()
#     z_map = tf_graph_map(find_map, x_test, parallel_iterations=1000)

2021-10-21 18:12:13.295761: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing.
2021-10-21 18:12:13.295940: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
2021-10-21 18:19:46.839539: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2021-10-21 18:19:46.918427: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1748] CUPTI activity buffer flushed
2021-10-21 18:19:51.402454: I tensorflow/core/profiler/internal/gpu/cupti_collector.cc:673]  GpuTracer has collected 62017 callback api events and 62016 activity events. 
2021-10-21 18:19:58.678041: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
2021-10-21 18:20:07.941078: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./testing/20211021-181206/plugins/profile/2021_10_21_18_19_58

2021-10-21 18:20:13.999695: I tensorflow/core/profiler/rpc/client/save_profile.cc:142

CPU times: user 10min 49s, sys: 55.9 s, total: 11min 45s
Wall time: 8min 13s


In [19]:
print(z_map.shape)

(10100, 10)


In [20]:
print(z_map[0])

tf.Tensor(
[0.51437443 0.71470994 0.39675683 0.12056316 0.5238756  0.2279173
 0.8093918  0.7783619  0.05651172 0.8553338 ], shape=(10,), dtype=float32)
