In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from models.autoencoder import DenseAutoencoder
from models.flows import MAF
from models.nn import PaeBuilder

import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow.keras as tfk
print(tf.__version__)

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
#tf.config.list_physical_devices()


In [None]:
from loaders.LHCO import LhcoRnDLoader
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer

scaler = QuantileTransformer(output_distribution='uniform')
#scaler = MinMaxScaler()
files = {
    'bkg':'../data/MC_bkgHLF_merged.h5',
    'bbox':'../data/BBOX1_bkgHLF_merged.h5',
    'bbox2':'../data/BBOX2_bkgHLF_merged.h5'
}

train_fractions = {
    'bkg':1
}

test_fractions = {
    'bbox2':1
}

loader = LhcoRnDLoader(files, 'all', scaler)
loader.preprocessing('bkg')
train = loader.make_train_val(1_000_000, train_fractions, val_split=.2)
test = loader.make_test(1_000_000, test_fractions, replace=False)

In [None]:
from utils.plotting import feature_plots

feature_plots(train['x_train'])



In [None]:
feature_plots(test['x_test'])


### Reweighting

In [None]:
from sklearn.mixture import GaussianMixture
GMM = GaussianMixture

gmm = GMM(n_components=200, covariance_type='full').fit(train["mjj_train"].reshape(-1,1))
plt.figure(figsize=(12,8))
_, b, _ = plt.hist(train["mjj_train"], bins=50, label='mjj true', alpha=.5, density=True)
sample = gmm.sample(train["mjj_train"].shape[0])
plt.hist(sample[0], bins=b, label='mjj GMM', alpha=.5, density=True)
plt.legend()
plt.show()

In [None]:
weights2 = gmm.score_samples(train["mjj_train"].reshape(-1,1))
weights2_valid = gmm.score_samples(train["mjj_valid"].reshape(-1,1))
plt.figure(figsize=(12,8))
_, b, _ = plt.hist(train["mjj_train"], bins=50, label='mjj weighted', alpha=.5, weights=np.exp(weights2))
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(12,8))
plt.scatter(train["mjj_train"], 1/np.exp(weights2))
plt.show()

In [None]:
import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow.keras as tfk
import numpy as np
import sys

tfd = tfp.distributions
tfb = tfp.bijectors
tfkl = tfk.layers

sys.path.append('../')

from models.autoencoder import DenseAutoencoder
from models.flows import MAF

In [None]:

builder = PaeBuilder()

ae_config = {
    'input_dim':47, 
    'encoding_dim':10, 
    'units_list':[30],
    'weight_reg':tfk.regularizers.l1(1e-6),
    'output_activation':tf.nn.sigmoid
}
nf_config = {
    'n_dims':10, 
    'n_layers':5, 
    'units':[32 for i in range(4)]
}
optimizer_ae = {
    'lr': 0.05
}
optimizer_nf = {
    'lr': 0.005
}

builder.make_ae_model(DenseAutoencoder, ae_config)
builder.make_nf_optimizer(tfk.optimizers.Adam, optimizer_ae)
builder.make_nf_model(MAF, nf_config)
builder.make_nf_optimizer(tfk.optimizers.Adam, optimizer_nf)
builder.compile_ae()
builder.compile_nf()
pae = builder.pae

In [None]:
ae_train ={
    'batch_size':200,
    'epochs':180,
    'sample_weight':1/np.exp(weights2),
    'validation_data':(train["x_valid"],train["x_valid"],1/np.exp(weights2_valid)),
    'callbacks':tfk.callbacks.ReduceLROnPlateau(
        factor=0.2,
        patience=10,
        verbose=1
    )
}

nf_train ={
    'batch_size':200,
    'epochs':100,
    'validation_data':(train["x_valid"],train["x_valid"]),
    'callbacks':tfk.callbacks.ReduceLROnPlateau(
        factor=0.2,
        patience=5,
        verbose=1
    )
}
with tf.device("/device:GPU:0"):
    %time pae.fit(train["x_train"],ae_train,nf_train)

In [None]:
from utils.plotting import loss_plot, latent_space_plot, mjj_cut_plot, \
                           sculpting_plot, roc_plot

loss_plot(pae.history)


In [None]:
print("Encoding . . .")
z_true = pae.ae.encode(train['x_train'])
print("Sampling . . .")
z_sample = pae.nf.sample(train['x_train'].shape[0])
print("Plotting . . .")
latent_space_plot(z_true, z_sample, save_path='plots/latent_space.png')

In [None]:
def optimal_grid(n):
    rows = np.floor(np.sqrt(n))
    residual = 1 if n%rows != 0 else 0
    cols = n//rows + residual
    return int(rows), int(cols)



In [None]:
mse = pae.reco_error(train['x_train'])
pae.compute_implicit_sigma(train['x_valid'])
ascore = -pae.anomaly_score(train['x_train'])

mjj_cut_plot(mse, train['mjj_train'], prc=80, score_name='MSE')#, save_path='./plots/mse_cut.png')
mjj_cut_plot(ascore, train['mjj_train'], prc=80, score_name='NLL')


In [None]:
ano_scores = {
    'MSE': mse,
    'NLL': ascore
}

sculpting_plot(ano_scores, train['mjj_train'], max_prc=99)#, save_path='./plots/mass_sculpting.png')

In [None]:
# scaler2 = QuantileTransformer(output_distribution='uniform')
# files2 = {
#     'bbox':'../data/BBOX1_bkgHLF_merged.h5'
# }

# test_fractions = {
#     'bbox':1
# }

# loader = LhcoRnDLoader(files, 'all', scaler2)
# loader.preprocessing('bbox')
# test2 = loader.make_test(1_000_000, test_fractions, replace=False)

In [None]:
ascore_test = -pae.anomaly_score(test['x_test'])
mjj_cut_plot(ascore_test, test['mjj_test'], prc=90, score_name='NLL', bins=100, save_path=None)


In [None]:

ano_scores = {
    'MSE': mse,
    'NLL': ascore
}

sculpting_plot(ano_scores, train['mjj_train'], max_prc=99, save_path='./plots/mass_sculpting_bkg.png')

ascore_test = -pae.anomaly_score(test['x_test'])
bkg, data = mjj_cut_plot(ascore_test, test['mjj_test'], prc=99, score_name='NLL', bins=100, save_path='./plots/cut_bbox1_samescaler.png')

import pyBumpHunter as BH

weights = np.repeat(1/(bkg.shape[0]/data.shape[0]),bkg.shape[0])
hunter = BH.BumpHunter(rang=(3200,4800),
                    width_min=2,
                    width_max=6,
                    width_step=1,
                    scan_step=1,
                    Npe=10000,
                    Nworker=1,
                    seed=666,
                    weights=weights
                )
hunter.BumpScan(data,bkg)
hunter.PlotBump(data,bkg,filename='./plots/bump_bbox1.png')
hunter.PrintBumpTrue(data,bkg)