In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from models.autoencoder import DenseAutoencoder
from models.flows import MAF
from models.nn import PaeBuilder

import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow.keras as tfk

In [None]:
from loaders.LHCO import LhcoRnDLoader
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer

scaler = QuantileTransformer(output_distribution='uniform')
#scaler = MinMaxScaler()
files = {
    'bkg':'../data/RnD_bkg_HLF.h5',
    'sig1':'../data/RnD_sig1_HLF.h5',
    'sig2':'../data/RnD_sig2_HLF.h5'
}

train_fractions = {
    'bkg':1
}

test_fractions = {
    'bkg':.5,
    'sig1':.5
}

loader = LhcoRnDLoader(files, 'all', scaler)
loader.preprocessing('bkg')
train = loader.make_train_val(250_000, train_fractions, val_split=.2)
test = loader.make_test(100_000, test_fractions, replace=False)

In [None]:
from utils.plotting import optimal_grid

r, c = optimal_grid(train['x_train'].shape[1])
fig = plt.figure(figsize=(15,8))
for i in range(train['x_train'].shape[1]):
    plt.subplot(r, c, i+1)
    n1, b, _ = plt.hist(train['x_train'][:,i], bins=30, density=True, alpha=0.5)
    plt.tight_layout()
plt.show()



In [None]:

fig = plt.figure(figsize=(15,8))
for i in range(test['x_test'].shape[1]):
    plt.subplot(r, c, i+1)
    n1, b, _ = plt.hist(test['x_test'][25_000:,i], bins=30, density=True, alpha=0.5)
    plt.tight_layout()
plt.show()


In [None]:
import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow.keras as tfk
import numpy as np
import sys

tfd = tfp.distributions
tfb = tfp.bijectors
tfkl = tfk.layers

sys.path.append('../')

from models.autoencoder import DenseAutoencoder
from models.flows import MAF
tf.config.list_physical_devices()

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())


In [None]:

builder = PaeBuilder()

ae_config = {
    'input_dim':47, 
    'encoding_dim':10, 
    'units_list':[30, 20],
    'weight_reg':tfk.regularizers.l1(1e-6),
    'output_activation':tf.nn.sigmoid
}
nf_config = {
    'n_dims':10, 
    'n_layers':5, 
    'units':[32 for i in range(4)]
}
optimizer_ae = {
    'lr': 0.05
}
optimizer_nf = {
    'lr': 0.005
}

builder.make_ae_model(DenseAutoencoder, ae_config)
builder.make_nf_optimizer(tfk.optimizers.Adam, optimizer_ae)
builder.make_nf_model(MAF, nf_config)
builder.make_nf_optimizer(tfk.optimizers.Adam, optimizer_nf)
builder.compile_ae()
builder.compile_nf()
pae = builder.pae

In [None]:
ae_train ={
    'batch_size':200,
    'epochs':70,
    'validation_data':(train["x_valid"],train["x_valid"]),
    'callbacks':tfk.callbacks.ReduceLROnPlateau(
        factor=0.2,
        patience=5,
        verbose=1
    )
}

nf_train ={
    'batch_size':200,
    'epochs':100,
    'validation_data':(train["x_valid"],train["x_valid"]),
    'callbacks':tfk.callbacks.ReduceLROnPlateau(
        factor=0.2,
        patience=5,
        verbose=1
    )
}
pae.fit(train["x_train"],ae_train,nf_train)

In [None]:
from utils.plotting import loss_plot, latent_space_plot, mjj_cut_plot, \
                           sculpting_plot

loss_plot(pae.history)


In [None]:
latent_space_plot(pae, train['x_train'], save_path='plots/latent_space.png')

In [None]:
def optimal_grid(n):
    rows = np.floor(np.sqrt(n))
    residual = 1 if n%rows != 0 else 0
    cols = n//rows + residual
    return int(rows), int(cols)



In [None]:
mse = pae.reco_error(train['x_train'])
from scipy.stats import entropy
from scipy.spatial.distance import jensenshannon


mjj_cut_plot(mse, train['excl_train'], prc=80, score_name='MSE', save_path='./plots/mse_cut.png')
sculpting_plot(mse, train['excl_train'], max_prc=99, save_path='./plots/mse_sculpting.png')


In [None]:
# i_val = np.random.choice(bkg_train.shape[0], 10_000, replace=False)
# bkg_train[i_val,:]
pae.compute_implicit_sigma(train['x_valid'])
ascore = pae.anomaly_score(train['x_train'])
mjj_cut_plot(-ascore, train['excl_train'], prc=80, score_name='ln p(x)')
sculpting_plot(-ascore, train['excl_train'], max_prc=99)

In [None]:
# loader._scaled_data['sig1']

# from utils.plotting import optimal_grid

# r, c = optimal_grid(loader._scaled_data['sig1'].shape[1])
# fig = plt.figure(figsize=(15,8))
# for i in range(loader._scaled_data['sig1'].shape[1]):
#     plt.subplot(r, c, i+1)
#     n1, b, _ = plt.hist(loader._scaled_data['sig1'][:,i], bins=30, density=True, alpha=0.5)
#     plt.tight_layout()
# plt.show()

In [None]:
print(test['labels'].shape)
def binarize(label):
    return 1 if label == 'sig1' else 0
print(binarize(test['labels'][0]), binarize(test['labels'][-1]))
print( np.array(list(map(binarize, test['labels']))))

In [None]:
mse_test = pae.reco_error(test['x_test'])
#ascore_test = np.concatenate([-pae.anomaly_score(test['x_test'][:25000]), -pae.anomaly_score(test['x_test'][25000:])])
ascore_test = -pae.anomaly_score(test['x_test'])
targets = np.array(list(map(binarize, test['labels'])))
print(targets)
from sklearn.metrics import roc_curve, auc
scores = ascore_test
scores_mse = mse_test
#targets = np.concatenate([np.zeros(ascore.shape[0]), np.ones(ascore_sig.shape[0])])
fpr, tpr, _ = roc_curve(targets, scores)
roc_auc = auc(fpr, tpr)
label = f'ROC curve (AUC = {roc_auc:.2f})'
fig = plt.figure(figsize=(8,8))
plt.plot(fpr,tpr,color='red',label='NLL '+label)
fpr, tpr, _ = roc_curve(targets, scores_mse)
roc_auc = auc(fpr, tpr)
label = f'ROC curve (AUC = {roc_auc:.2f})'
plt.plot(fpr,tpr,color='orange',label='MSE '+label)
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title("ROC Curve")
plt.plot([0, 1],[0, 1], linestyle='--',color=(0.6, 0.6, 0.6), label='Random guess')
plt.legend(loc="best")
plt.tight_layout()
plt.grid()

In [None]:
mid = ascore_test.shape[0]//2
print(pae.sigma_square)

In [None]:
plt.figure(figsize=(12,8))
n,b,p = plt.hist(ascore_test[:mid], bins=60, alpha=.5)
plt.hist(ascore_test[mid:], bins=b, alpha=.5)
plt.show()

plt.figure(figsize=(12,8))
n,b,p = plt.hist(test['excl_test'][:mid], bins=60, alpha=.5)
plt.hist(test['excl_test'][mid:], bins=b, alpha=.5)
plt.show()


In [None]:
reco_error = np.square(pae.ae(test['x_test'])-test['x_test'])
z = pae._ae.encode(test['x_test'])
byz = pae._nf.inverse(z)
detJ = pae._nf.inverse_log_det_jacobian(z)

as2 = -0.5*np.dot(reco_error,pae.sigma_square**(-1))-0.5*np.linalg.norm(byz,axis=1)**2+detJ
mjj_cut_plot(-as2, test['excl_test'], prc=80, score_name='-ln p(x)')
sculpting_plot(-as2, test['excl_test'], max_prc=99)

print(np.linalg.norm(byz,axis=1))


In [None]:
x = train['x_valid']
z = pae.ae.encode(x)
byz = pae._nf.inverse(z)
print(byz.shape)
plt.hist(byz[:,2], bins=30, alpha= 0.5)
plt.hist(z[:,2], bins=30, alpha=0.5)

In [None]:
plt.figure(figsize=(12,8))
n,b,p = plt.hist(as2[:25_000], bins=60, alpha=.5)
plt.hist(as2[25_000:], bins=b, alpha=.5)
plt.show()

In [None]:
def roc_plot(fp, vp, roc_auc, save_path: str = None):
    label = f'ROC curve (AUC = {roc_auc:.2f})'
    fig = plt.figure(figsize=(8,8))
    plt.plot(fp,vp,color='red',label=label)
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.title("ROC Curve")
    plt.plot([0, 1],[0, 1], linestyle='--',color=(0.6, 0.6, 0.6), label='Random guess')
    plt.legend(loc="best")
    plt.tight_layout()
    plt.grid()
    if save_path is not None:
        plt.savefig(save_path)
    plt.show()

scores = -as2
targets = np.concatenate([np.zeros(as2.shape[0]//2), np.ones(as2.shape[0]//2)])
fpr, tpr, _ = roc_curve(targets, scores)
roc_auc = auc(fpr, tpr)
roc_plot(fpr, tpr, roc_auc)

In [None]:
scores