In [None]:
import numpy as np
import matplotlib.pyplot as plt
import math
import h5py
import tensorflow as tf
import joblib
#from losses import threeD_loss, split_3D_loss, mse_split, mse_split_particles_loss, kl_loss
#tf.compat.v1.enable_eager_execution()
#import mplhep as hep
#plt.style.use(hep.style.CMS)

### Load results

In [None]:
input_dir = 'AE_result_pruned.h5'
data = h5py.File(input_dir, 'r')

In [None]:
bsm_labels = ['Leptoquark','A to 4 leptons', 'hChToTauNu', 'hToTauTau']

In [None]:
# QCD
X_test_scaled = data['QCD'][:]
X_test = data['QCD_input'][:]
qcd_prediction = data['predicted_QCD'][:]


#BSM
bsm_prediction=[]; bsm_target = []; bsm_prediction_board=[]; bsm_data=[];bsm_prediction_onnx=[]
for bsm in bsm_labels:
    bsm_data.append(data[bsm+'_input'][:])
    #print(data[bsm+'_input'][:].shape)
    bsm_target.append(data[bsm+'_scaled'][:])
    bsm_prediction.append(data['predicted_'+bsm][:])


In [None]:
# loss = data['loss'][:]
# val_loss = data['val_loss'][:]

In [None]:
data.close()

### Plot training/validation loss

In [None]:
plt.figure(figsize=(10,6))
plt.plot(loss[:], label='Training loss')
plt.plot(val_loss[:], label='Validation loss')
plt.title('AE - Training and validation loss')
#plt.yscale('log', nonposy='clip')
plt.legend(loc='best')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

### Plot features Test vs Prediction - QCD

In [None]:
def make_feature_plots(true, prediction, xlabel, particle, bins, density, ranges=None):
    print(find_min_max_range(true, prediction))
    plt.figure(figsize=(7,5))
    if ranges == None: ranges = find_min_max_range(true, prediction) 
    plt.hist(prediction, bins=bins, histtype='step', density=density, range = ranges)
    plt.hist(true, bins=bins, histtype='step', density=density, range = ranges)
    plt.yscale('log', nonpositive='clip')
    plt.ylabel('Prob. Density(a.u.)')
    plt.xlabel(xlabel)
    plt.tight_layout()
    plt.legend([particle+' Predicted', particle+' True'])
    plt.show()

In [None]:
def make_delta_feature_plots(true, prediction, xlabel, particle, bins, density, ranges=None, phi=False):
    plt.figure(figsize=(7,5))
    if phi:
        delta = (true - prediction)/true
        xlabel = xlabel+' pull'
    else: 
        delta = (true - prediction)/true
        xlabel = xlabel+' pull'
    plt.hist(delta, bins=bins, histtype='step', density=density, range=ranges, label=particle)
    plt.axvline(delta.mean(), color='k', linestyle='dashed', linewidth=1, label='mean = '+str(round(delta.mean(),2)))
    plt.legend(loc='upper right')
    plt.yscale('log', nonpositive='clip')
    plt.ylabel('Prob. Density(a.u.)')
    plt.xlabel(xlabel)
    plt.tight_layout()
    plt.annotate('RMS =  %.2f' % np.sqrt(np.mean(delta**2)), xy=(0, 1), xytext=(12, -12), va='top',\
            xycoords='axes fraction', textcoords='offset points')
    #plt.show()
    
    if 'pT' in xlabel: xlabel = 'pT'
    elif 'phi' in xlabel: xlabel = 'phi'
    elif 'eta' in xlabel: xlabel = 'eta'
    
    plt.savefig(f'plots/{particle}_{xlabel}_zscore.pdf', facecolor='white')

In [None]:
def find_min_max_range(true, pred):
    minRange = min(true)
    minPred = min(pred)
    if minPred < minRange: minRange = minPred
        
    maxRange = max(true)
    maxPred = max(pred)
    if maxPred > maxRange: maxRange = maxPred
        
    return (minRange, maxRange)

In [None]:
#mask_met = X_test[:,0:1]!=0
mask_met_delete = np.where(X_test[:,0:1].reshape(X_test.shape[0]*1)==0)[0]
#mask_eg = X_test[:,1:5]!=0
mask_eg_delete = np.where(X_test[:,1:5].reshape(X_test.shape[0]*4)==0)[0]
#mask_muon = X_test[:,5:9]!=0
mask_muon_delete = np.where(X_test[:,5:9].reshape(X_test.shape[0]*4)==0)[0]
#mask_jet = X_test[:,9:19]!=0
mask_jet_delete = np.where(X_test[:,9:19].reshape(X_test.shape[0]*10)==0)[0]

In [None]:
# reshape Test and Prediction datasets
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], 19, 3, 1)
qcd_pred_reshaped = qcd_prediction.reshape(qcd_prediction.shape[0], 19, 3, 1)

In [None]:
# MET
make_feature_plots(np.delete(X_test_reshaped[:,0:1,0].reshape(X_test.shape[0]*1),mask_met_delete),\
                   np.delete(qcd_prediction[:,0:1,0].reshape(qcd_prediction.shape[0]*1),mask_met_delete),\
                   'pT', 'MET', 100, True)
make_feature_plots(np.delete(X_test_reshaped[:,0:1,2].reshape(X_test_scaled.shape[0]*1),mask_met_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(math.pi*tf.math.tanh(qcd_prediction[:,0:1,2].reshape(qcd_prediction.shape[0]*1)))),mask_met_delete),\
                   '$\phi$', 'MET', 100, True)
# Jets
make_feature_plots(np.delete(X_test_reshaped[:,9:19,0].reshape(X_test.shape[0]*10),mask_jet_delete),\
                   np.delete(qcd_prediction[:, 9:19,0].reshape(qcd_prediction.shape[0]*10),mask_jet_delete),\
                   'pT', 'Jets', 100, True, ranges=(0,1000))
make_feature_plots(np.delete(X_test_reshaped[:,9:19,1].reshape(X_test.shape[0]*10),mask_jet_delete),\
                np.delete(tf.make_ndarray(tf.make_tensor_proto(4.0*tf.math.tanh(qcd_prediction[:,9:19,1].reshape(qcd_prediction.shape[0]*10)))),mask_jet_delete),\
                   '$\eta$', 'Jets', 100, True)
make_feature_plots(np.delete(X_test_reshaped[:,9:19,2].reshape(X_test.shape[0]*10),mask_jet_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(math.pi*tf.math.tanh(qcd_prediction[:,9:19,2].reshape(qcd_prediction.shape[0]*10)))),mask_jet_delete),\
                   '$\phi$', 'Jets', 100, True) # wrap phi
# Muons
make_feature_plots(np.delete(X_test_reshaped[:,5:9,0].reshape(X_test.shape[0]*4),mask_muon_delete),\
                   np.delete(qcd_prediction[:,5:9,0].reshape(qcd_prediction.shape[0]*4),mask_muon_delete),\
                    'pT', 'Muons', 100, True)
make_feature_plots(np.delete(X_test_reshaped[:,5:9,1].reshape(X_test.shape[0]*4),mask_muon_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(2.1*tf.math.tanh(qcd_prediction[:,5:9,1].reshape(qcd_prediction.shape[0]*4)))),mask_muon_delete),\
                   '$\eta$', 'Muons', 100, True)
make_feature_plots(np.delete(X_test_reshaped[:,5:9,2].reshape(X_test.shape[0]*4),mask_muon_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(math.pi*tf.math.tanh(qcd_prediction[:,5:9,2].reshape(qcd_prediction.shape[0]*4)))),mask_muon_delete),\
                   '$\phi$', 'Muons', 100, True)
#EGammas
make_feature_plots(np.delete(X_test_reshaped[:,1:5,0].reshape(X_test.shape[0]*4),mask_eg_delete),\
                   np.delete(qcd_prediction[:,1:5,0].reshape(qcd_prediction.shape[0]*4),mask_eg_delete),\
                   'pT', 'EGammas', 100, True, ranges = (0.75937235, 500))
make_feature_plots(np.delete(X_test_reshaped[:,1:5,1].reshape(X_test.shape[0]*4),mask_eg_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(3.0*tf.math.tanh(qcd_prediction[:,1:5,1].reshape(qcd_prediction.shape[0]*4)))),mask_eg_delete),\
                   '$\eta$', 'EGammas', 100, True)
make_feature_plots(np.delete(X_test_reshaped[:,1:5,2].reshape(X_test.shape[0]*4),mask_eg_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(math.pi*tf.math.tanh(qcd_prediction[:,1:5,2].reshape(qcd_prediction.shape[0]*4)))),mask_eg_delete),\
                   '$\phi$', 'EGammas', 100, True)

### Plot features (Test - Prediction) - QCD

In [1]:
# MET
make_delta_feature_plots(np.delete(X_test_reshaped[:,0:1,0].reshape(X_test.shape[0]*1),mask_met_delete),\
                   np.delete(qcd_prediction[:,0:1,0].reshape(qcd_prediction.shape[0]*1),mask_met_delete),\
                   'pT', 'MET', 200, True, ranges=(-1000, 1000))
make_delta_feature_plots(np.delete(X_test_reshaped[:,0:1,2].reshape(X_test.shape[0]*1),mask_met_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(math.pi*tf.math.tanh(qcd_prediction[:,0:1,2].reshape(qcd_prediction.shape[0]*1)))),mask_met_delete),\
                   '$\phi$', 'MET', 200, True, phi=True, ranges=(-200, 200)) # wrap phi
# Jets
make_delta_feature_plots(np.delete(X_test_reshaped[:,9:19,0].reshape(X_test.shape[0]*10),mask_jet_delete),\
                   np.delete(qcd_prediction[:, 9:19,0].reshape(qcd_prediction.shape[0]*10),mask_jet_delete),\
                   'pT', 'Jets', 200, True, ranges=(-10000, 10000))
make_delta_feature_plots(np.delete(X_test_reshaped[:,9:19,1].reshape(X_test.shape[0]*10),mask_jet_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(4.0*tf.math.tanh(qcd_prediction[:,9:19,1].reshape(qcd_prediction.shape[0]*10)))),mask_jet_delete),\
                   '$\eta$', 'Jets', 200, True,phi=True, ranges=(-250,250))
make_delta_feature_plots(np.delete(X_test_reshaped[:,9:19,2].reshape(X_test.shape[0]*10),mask_jet_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(math.pi*tf.math.tanh(qcd_prediction[:,9:19,2].reshape(qcd_prediction.shape[0]*10)))),mask_jet_delete),\
                   '$\phi$', 'Jets', 200, True, phi=True, ranges=(-250, 250)) # wrap phi
# Muons
make_delta_feature_plots(np.delete(X_test_reshaped[:,5:9,0].reshape(X_test.shape[0]*4),mask_muon_delete),\
                   np.delete(qcd_prediction[:,5:9,0].reshape(qcd_prediction.shape[0]*4),mask_muon_delete),\
                    'pT', 'Muons', 200, True, ranges=(-1000,1000))
make_delta_feature_plots(np.delete(X_test_reshaped[:,5:9,1].reshape(X_test.shape[0]*4),mask_muon_delete),\
                 np.delete(tf.make_ndarray(tf.make_tensor_proto(2.1*tf.math.tanh(qcd_prediction[:,5:9,1].reshape(qcd_prediction.shape[0]*4)))),mask_muon_delete),\
                   '$\eta$', 'Muons', 200, True, phi=True, ranges=(-100, 100))
make_delta_feature_plots(np.delete(X_test_reshaped[:,5:9,2].reshape(X_test.shape[0]*4),mask_muon_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(math.pi*tf.math.tanh(qcd_prediction[:,5:9,2].reshape(qcd_prediction.shape[0]*4)))),mask_muon_delete),\
                  '$\phi$', 'Muons', 200, True, phi=True, ranges=(-100, 100))
#EGammas
make_delta_feature_plots(np.delete(X_test_reshaped[:,1:5,0].reshape(X_test.shape[0]*4),mask_eg_delete),\
                   np.delete(qcd_prediction[:,1:5,0].reshape(qcd_prediction.shape[0]*4),mask_eg_delete),\
                   'pT', 'EGammas', 200, True, ranges=(-1000, 1000))
make_delta_feature_plots(np.delete(X_test_reshaped[:,1:5,1].reshape(X_test.shape[0]*4),mask_eg_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(3.0*tf.math.tanh(qcd_prediction[:,1:5,1].reshape(qcd_prediction.shape[0]*4)))),mask_eg_delete),\
                   '$\eta$', 'EGammas', 200, True, phi=True, ranges=(-100, 100))
make_delta_feature_plots(np.delete(X_test_reshaped[:,1:5,2].reshape(X_test.shape[0]*4),mask_eg_delete),\
                   np.delete(tf.make_ndarray(tf.make_tensor_proto(math.pi*tf.math.tanh(qcd_prediction[:,1:5,2].reshape(qcd_prediction.shape[0]*4)))),mask_eg_delete),\
                   '$\phi$', 'EGammas', 200, True, phi=True, ranges=(-100, 100))

NameError: name 'make_delta_feature_plots' is not defined

### Calculate loss for QCD and BSM data

In [None]:
from function import make_mse_loss_numpy

In [None]:
def return_total_loss(loss, X, qcd_pred, bsm_t, bsm_pred):
    
    total_loss = []
    total_loss.append(loss(X, qcd_pred.astype(np.float32)))
    for i, bsm_i in enumerate(bsm_t):
        total_loss.append(loss(bsm_i, bsm_pred[i].astype(np.float32)))
    return total_loss

In [None]:
# define loss used
loss = make_mse_loss_numpy

In [None]:
total_loss = return_total_loss(loss, X_test_scaled, qcd_prediction, bsm_target, bsm_prediction)

### Plot Loss Distributions

In [None]:
labels = ['QCD multijet', 'Leptoquark', 'A to 4 leptons', 'hChToTauNu', 'hToTauTau']

In [None]:
minScore = 999999.
maxScore = 0
for i in range(len(labels)):
    thisMin = np.min(total_loss[i])
    thisMax = np.max(total_loss[i])
    minScore = min(thisMin, minScore)
    maxScore = max(maxScore, thisMax)

In [None]:
bin_size=100
plt.figure(figsize=(10,8))
for i, label in enumerate(labels):
    print(len(total_loss[i]))
    plt.hist(total_loss[i], bins=bin_size, label=label, density = True,
         histtype='step', fill=False, linewidth=1.5, range=(minScore, 10000))
plt.yscale('log')
#plt.xscale('log')
#plt.title('MSE')
plt.xlabel("Autoencoder Loss")
plt.ylabel("Probability (a.u.)")
plt.grid(True)
plt.title('MSE split loss')
plt.legend(loc='best')
plt.show()

## Plot ROC curves

In [None]:
colors = ['C0','C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7']

In [None]:
leptoquark_results=[]; ato4l_results=[]; ch_results=[]; to_results=[]

In [None]:
labels_legend = [r'LQ $\rightarrow$ b$\tau$', r'A $\rightarrow$ 4L', r'$h_{\pm} \rightarrow \tau\nu$', r'$h_{0} \rightarrow \tau\tau$']

In [None]:
from sklearn.metrics import roc_curve, auc

target_qcd = np.zeros(total_loss[0].shape[0])

plt.figure(figsize=(10,8))
for i, label in enumerate(labels):
    if i == 0: continue
    
    trueVal = np.concatenate((np.ones(total_loss[i].shape[0]), target_qcd))
    predVal_loss = np.concatenate((total_loss[i], total_loss[0]))

    fpr_loss, tpr_loss, threshold_loss = roc_curve(trueVal, predVal_loss)

    auc_loss = auc(fpr_loss, tpr_loss)
    plt.plot(fpr_loss, tpr_loss, "-", label='%s (auc = %.1f%%)'%('keras '+ labels_legend[i-1],auc_loss*100.), linewidth=1.5, color=colors[i])
    if i == 1: 
        leptoquark_results = [fpr_loss, tpr_loss, auc_loss]
        #plt.plot(lepto_results[0][0], lepto_results[0][1], "-", label='%s (auc = %.1f%%)'%('hls '+labels_legend[i-1],0.891*100.), linewidth=1.5, color=colors[i], alpha=0.6)
    elif i == 2: 
        ato4l_results = [fpr_loss, tpr_loss, auc_loss]
        #plt.plot(ato4l_results[0][0], ato4l_results[0][1], "-",label='%s (auc = %.1f%%)'%('hls '+labels_legend[i-1],0.885*100.),  linewidth=1.5, color=colors[i], alpha=0.6)
    elif i == 3: 
        ch_results = [fpr_loss, tpr_loss, auc_loss]
        #plt.plot(ch_results[0][0], ch_results[0][1], "-", label='%s (auc = %.1f%%)'%('hls '+labels_legend[i-1],0.714*100.), linewidth=1.5, color=colors[i], alpha=0.6)
    else: 
        to_results = [fpr_loss, tpr_loss, auc_loss]
        #plt.plot(to_results[0][0], to_results[0][1], "-", label='%s (auc = %.1f%%)'%('hls '+labels_legend[i-1],0.585*100.), linewidth=1.5, color=colors[i], alpha=0.6)
    plt.semilogx()
    plt.semilogy()
    plt.ylabel("True Positive Rate")
    plt.xlabel("False Positive Rate")
    plt.grid(True)
    plt.legend(loc='center right')
    plt.tight_layout()
plt.plot(np.linspace(0, 1),np.linspace(0, 1), '--', color='0.75')
plt.axvline(0.00001, color='red', linestyle='dashed', linewidth=1)
plt.title("ROC AE")
#plt.savefig('AE_binary_ROCs.pdf')
plt.show()


In [None]:
# np.savetxt('AE_leptoquark_Results_HLS', leptoquark_results[:-1], delimiter=',')
# np.savetxt('AE_ato4l_Results_HLS', ato4l_results[:-1], delimiter=',')

In [None]:
# np.savetxt('AE_CH_Results_HLS', ch_results[:-1], delimiter=',')
# np.savetxt('AE_TO_Results_HLS', to_results[:-1], delimiter=',')