# Convolutional Autoencoder Training for Anomaly Detection @ L1Trigger

# Packages

In [None]:
import h5py
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers, models
from sklearn.metrics import roc_curve, auc
import tensorflow_probability as tfp

import keras_tuner
from keras_tuner import Hyperband

import joblib

# Input files reading

All input files are already sorted in Calo regions (i, j) ~ (18, 14)<br>
Where i = 0 -> 17 corresponds to GCT_Phi = 0 -> 17<br>
Where j = 0 -> 13 corresponds to RCT_Eta = 4 -> 17

Keep this ordering as is when feeding into neural nets. Also keep this in mind when generating/preparing new samples.

Zerobias and MC signal files:

In [None]:
ZeroBias = np.concatenate((h5py.File('bkg/ZeroBias_0.h5', 'r')['CaloRegions'][()],
                           h5py.File('bkg/ZeroBias_1.h5', 'r')['CaloRegions'][()],
                           h5py.File('bkg/ZeroBias_2.h5', 'r')['CaloRegions'][()]))
ZeroBias = ZeroBias.astype(dtype = 'float32').reshape(-1, 18, 14, 1)
print('ZeroBias shape: ' + str(ZeroBias.shape))

MC_files = []
MC_files.append('bkg/110X/QCD_0.h5')#i=0
#MC_files.append('bkg/110X/QCD_1.h5')
#MC_files.append('bkg/110X/QCD_2.h5')
MC_files.append('bkg/120X/SingleNeutrino_E-10_0.h5')#i=1
#MC_files.append('bkg/120X/SingleNeutrino_E-10_1.h5')
#MC_files.append('bkg/120X/SingleNeutrino_E-10_2.h5')
MC_files.append('bkg/120X/SingleNeutrino_Pt-2To20_0.h5')#i=2
#MC_files.append('bkg/120X/SingleNeutrino_Pt-2To20_1.h5')
#MC_files.append('bkg/120X/SingleNeutrino_Pt-2To20_2.h5')

MC_files.append('sig/110X/GluGluToHHTo4B_node_SM_TuneCP5_14TeV.h5')#i=3
'''
MC_files.append('sig/110X/HTo2LongLivedTo4mu_MH-1000_MFF-450_CTau-10000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/110X/HTo2LongLivedTo4mu_MH-125_MFF-12_CTau-900mm_TuneCP5_14TeV.h5')
MC_files.append('sig/110X/HTo2LongLivedTo4mu_MH-125_MFF-25_CTau-1500mm_TuneCP5_14TeV.h5')
MC_files.append('sig/110X/HTo2LongLivedTo4mu_MH-125_MFF-50_CTau-3000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/110X/VBFHToTauTau_M125_TuneCUETP8M1_14TeV.h5')
MC_files.append('sig/110X/VBF_HH_CV_1_C2V_1_C3_1_TuneCP5_PSweights_14TeV.h5')
MC_files.append('sig/110X/VBF_HToInvisible_M125_TuneCUETP8M1_14TeV.h5')
MC_files.append('sig/110X/VectorZPrimeToQQ_M100_pT300_TuneCP5_14TeV.h5')
MC_files.append('sig/110X/VectorZPrimeToQQ_M200_pT300_TuneCP5_14TeV.h5')
MC_files.append('sig/110X/VectorZPrimeToQQ_M50_pT300_TuneCP5_14TeV.h5')#i=13
MC_files.append('sig/110X/ZprimeToZH_MZprime1000_MZ50_MH80_ZTouds_HTouds_narrow_TuneCP5_14TeV.h5')
MC_files.append('sig/110X/ZprimeToZH_MZprime600_MZ50_MH80_ZTouds_HTouds_narrow_TuneCP5_14TeV.h5')
MC_files.append('sig/110X/ZprimeToZH_MZprime800_MZ50_MH80_ZTouds_HTouds_narrow_TuneCP5_14TeV.h5')
'''
MC_files.append('sig/120X/GluGluHToTauTau_M-125_TuneCP5_14TeV.h5')
'''
MC_files.append('sig/120X/GluGluToHHTo4B_node_cHHH1_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/GluGluToHHTo4B_node_cHHH5_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-1000_MFF-450_CTau-100000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-1000_MFF-450_CTau-10000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-125_MFF-12_CTau-9000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-125_MFF-12_CTau-900mm_TuneCP5_14TeV.h5')#i=23
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-125_MFF-25_CTau-15000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-125_MFF-25_CTau-1500mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-125_MFF-50_CTau-30000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-125_MFF-50_CTau-3000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-250_MFF-120_CTau-10000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-250_MFF-120_CTau-1000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-250_MFF-60_CTau-1000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-350_MFF-160_CTau-10000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-350_MFF-160_CTau-1000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-350_MFF-160_CTau-500mm_TuneCP5_14TeV.h5')#i=33
'''
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-350_MFF-80_CTau-10000mm_TuneCP5_14TeV.h5')
'''
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-350_MFF-80_CTau-1000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4b_MH-350_MFF-80_CTau-500mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4mu_MH-1000_MFF-450_CTau-10000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4mu_MH-125_MFF-12_CTau-900mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4mu_MH-125_MFF-25_CTau-1500mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/HTo2LongLivedTo4mu_MH-125_MFF-50_CTau-3000mm_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/SUSYGluGluToBBHToBB_NarrowWidth_M-1200_TuneCP5_13TeV-pythia814TeV.h5')
MC_files.append('sig/120X/SUSYGluGluToBBHToBB_NarrowWidth_M-120_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/SUSYGluGluToBBHToBB_NarrowWidth_M-350_TuneCP5_14TeV.h5')#i=43
MC_files.append('sig/120X/SUSYGluGluToBBHToBB_NarrowWidth_M-600_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/TprimeBToTH_M-650_LH_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/VBFHHTo4B_CV_1_C2V_2_C3_1_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/VBFHToInvisible_M125_TuneCP5_14TeV.h5')
'''
MC_files.append('sig/120X/VBFHToTauTau_M125_TuneCP5_14TeV.h5')
'''
MC_files.append('sig/120X/VectorZPrimeGammaToQQGamma_M-10_GPt-75_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/VectorZPrimeToQQ_M-100_Pt-300_TuneCP5_14TeV.h5')
MC_files.append('sig/120X/VectorZPrimeToQQ_M-200_Pt-300_TuneCP5_14TeV.h5')#i=51
'''
MC = []
AcceptanceFlag = []
for i in range(len(MC_files)):
    MC.append(h5py.File(MC_files[i], 'r')['CaloRegions'][()].astype(dtype = 'float32'))
    MC[i] = MC[i].reshape(-1, 18, 14, 1)
    #Read acceptance flag in MC signals
    if i > 2:
        AcceptanceFlag.append(h5py.File(MC_files[i], 'r')['AcceptanceFlag'][()])
    else:
        AcceptanceFlag.append(np.ones((MC[i].shape[0])))
    
MC[0] = MC[0][:10000,:,:,:]#QCD
MC[1] = MC[1][:10000,:,:,:]#SingleNu_E10
MC[2] = MC[2][:10000,:,:,:]#SingleNu_Pt2To20

'''
#/nfs_scratch/dasu/2022-02-04/L1TSignalZerobiasMixer/cms-vbfh.csv
vbf = pd.read_csv('cms-vbfh.csv')
vbf.columns = ['eta','phi','et','pos','ebit','tbit']
vbf = vbf[251:]

event_col = []
for i in range(round(vbf.shape[0]/252)):
    for j in range(252):
        event_col.append(i)
        
vbf['event'] = event_col
vbf = vbf.drop(['pos','ebit','tbit'],axis=1)
vbf = vbf.sort_values(by=['event', 'phi', 'eta'], ascending = [True, True, True])
vbf = vbf.reindex(columns=['event','phi','eta','et'])
vbf = vbf.drop(['event'],axis=1)
vbf = vbf.to_numpy()
vbf = vbf.reshape((-1,18,14,3))
vbf = vbf[:,:,:,2]
vbf = vbf.reshape((-1,18,14,1))
vbf.shape
MC_files.append('/nfs_scratch/dasu/2022-02-04/L1TSignalZerobiasMixer/cms-vbfh.csv')
MC.append(vbf)
'''

#Throw away MC signal events that failed to pass the acceptance cuts
acceptance_filter = []
for i in range(len(MC_files)):
    acceptance_filter.append([])
    for j in range(MC[i].shape[0]):
        if AcceptanceFlag[i][j] == 1:
            acceptance_filter[i].append(True)
        else:
            acceptance_filter[i].append(False)
    MC[i] = MC[i][acceptance_filter[i],:,:,:]
    print('i = ' + str(i) + ': ' + str(MC[i].shape) + '; accepted ' + str(np.round(np.mean(AcceptanceFlag[i]), 4)))

Throw away events with max pt > 1023 GeV, since the calo system cannot produce more than that (input pt is 10 bits).

In [None]:
filter1023_zb = []
for i in range(ZeroBias.shape[0]):
    if ZeroBias[i,:,:,0].max() > 1023:
        filter1023_zb.append(False)
    else:
        filter1023_zb.append(True)
ZeroBias = ZeroBias[filter1023_zb,:,:,:]
print('ZeroBias shape = ' + str(ZeroBias.shape) + '; fraction left = ' + str(round(ZeroBias.shape[0]/len(filter1023_zb),4)))

filter1023_mc = []
for i in range(len(MC_files)):
    filter1023_mc.append([])
    for j in range(MC[i].shape[0]):
        if MC[i][j,:,:,0].max() > 1023:
            filter1023_mc[i].append(False)
        else:
            filter1023_mc[i].append(True)
    MC[i] = MC[i][filter1023_mc[i],:,:,:]
    print('i = ' + str(i) + ': ' + str(MC[i].shape) + '; fraction left = ' + str(round(MC[i].shape[0]/len(filter1023_mc[i]),4)))

The MC samples are clean, so we need to overlay them with ZB to be more realistic before doing any training/testing. It can be achieved by simple region-by-region addition between the two: MC(i,j) = MC(i,j) + ZB(i,j), where the ZB can be chosen at random per MC event.

In [None]:
np.random.seed(0)
MC_zb = []
for i in range(len(MC)):
    MC_zb.append(np.empty((MC[i].shape[0], 18, 14, 1)))
    ZB_random_event = np.random.randint(low = 0, high = ZeroBias.shape[0], size = MC[i].shape[0])
    for j in range(MC[i].shape[0]):
        MC_zb[i][j, :, :, 0] = ZeroBias[ZB_random_event[j], :, :, 0] + MC[i][j, :, :, 0]

Take a look at the calo region plots before and after the overlay,

where n = 0 (QCD), 1 (SingleNu_E10), 2 (SingleNu_Pt2To20), 3... (signals)

In [None]:
n = 0
for i in range(40,50):
    fig, ax = plt.subplots(figsize = (10,10))
    print(str(MC_files[n]))
    ax = plt.subplot(2, 2, 1)
    ax = sns.heatmap(MC[n][i,:,:,0].reshape(18, 14), vmin = 0, vmax = MC[n][i,:,:,0].max(), cmap = "Reds", cbar_kws = {'label': 'Normalized ET'})
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('MC')
    
    ax = plt.subplot(2, 2, 2)
    ax = sns.heatmap(MC_zb[n][i,:,:,0].reshape(18, 14), vmin = 0, vmax = MC[n][i,:,:,0].max(), cmap = "Reds", cbar_kws = {'label': 'Normalized ET'})
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('MC+ZB')
    
    plt.show()

Take a look at some ZB statistics.

In [None]:
ZB_mean = np.mean(ZeroBias, axis = 0)

fig, ax = plt.subplots(figsize = (10,10))
ax = plt.subplot(2, 2, 1)
ax = sns.heatmap(ZB_mean.reshape(18, 14), vmin = 0, vmax = ZB_mean.max(), cmap = "Reds", cbar_kws = {'label': 'ET (GeV)'})
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()

In [None]:
plt.hist(ZeroBias.reshape((-1)), bins = 20, log = True)
plt.xlabel("ZeroBias Et")
plt.show()

print('Mean ZeroBias pT = ' + str(np.mean(ZeroBias.reshape(-1))))

Very few ZB events might have high pt regions since they could contain signal, do we want to put a cut on ZB before training?

In [None]:
pt_filter = []
for i in range(ZeroBias.shape[0]):
    if ZeroBias[i,:,:,0].max() < 30.0:
        pt_filter.append(True)
    else:
        pt_filter.append(False)
ZeroBias_ptcut = ZeroBias[pt_filter,:,:,:]

# Hyperparameter searching (no quantization here)

If we want to train with custom loss functions.

In [None]:
import tensorflow.keras.backend as K
def custom_loss_for_train():
    def func(y_true, y_pred):
        #MSE(output, input)
        loss = K.mean((y_pred - y_true)**2, axis = [1, 2, 3])
        
        #MSE(output, mean ZB)
        #loss = K.mean((y_pred - ZB_mean)**2, axis = [1, 2, 3])
        
        #MSE(output, 0) for denoising
        #loss = K.mean(y_pred**2, axis = [1, 2, 3])
        
        return loss
    return func

Hypermodel for convolutional autoencoder (usually a teacher model for Knowledge Distillation later).

In [None]:
def hypermodel(hp):
    hp_model = tf.keras.Sequential()
    hp_model.add(tf.keras.layers.InputLayer(input_shape = (18, 14, 1)))
    hp_model.add(layers.Conv2D(filters = hp.Int('filters_1',
                                                min_value = 15,
                                                max_value = 25,
                                                step = 2),
                               kernel_size = (3, 3),
                               activation = 'relu',
                               strides = 1,
                               padding = 'same'))
    
    hp_model.add(layers.AveragePooling2D((2, 2)))
    hp_model.add(layers.Conv2D(filters = hp.Int('filters_2',
                                                min_value = 15,
                                                max_value = 25,
                                                step = 2),
                               kernel_size = (3, 3),
                               activation = 'relu',
                               strides = 1,
                               padding = 'same'))
    
    hp_model.add(layers.Conv2D(filters = 1,
                               kernel_size = (3, 3),
                               activation = 'relu',
                               strides = 1,
                               padding = 'same'))
    
    hp_model.add(layers.Conv2D(filters = hp.Int('filters_3',
                                                min_value = 15,
                                                max_value = 25,
                                                step = 2),
                               kernel_size = (3, 3),
                               activation = 'relu',
                               strides = 1,
                               padding = 'same'))
    
    hp_model.add(layers.UpSampling2D((2, 2)))
    hp_model.add(layers.Conv2D(filters = hp.Int('filters_4',
                                                min_value = 15,
                                                max_value = 25,
                                                step = 2),
                               kernel_size = (3, 3),
                               activation = 'relu',
                               strides = 1,
                               padding = 'same'))
    
    hp_model.add(layers.Conv2D(filters = 1, kernel_size = (3, 3), activation = 'relu', strides = 1, padding = 'same'))
    hp_model.compile(optimizer = 'adam', loss = custom_loss_for_train())
    return hp_model

Hypermodel for shallowly dense (usually a student model for Knowledge Distillation later).

In [None]:
def hypermodel(hp):
    hp_model = tf.keras.Sequential()
    hp_model.add(tf.keras.layers.InputLayer(input_shape = (18, 14, 1)))
    hp_model.add(tf.keras.layers.Flatten())
    hp_model.add(tf.keras.layers.Dense(units = hp.Int('units_1',
                                                      min_value = 10,
                                                      max_value = 40,
                                                      step = 2),
                                       activation = 'relu'))
    hp_model.add(tf.keras.layers.Dropout(rate = 0.3))
    hp_model.add(tf.keras.layers.Dense(1, activation = 'relu'))
    hp_model.compile(optimizer = 'adam', loss = 'mse')
    return hp_model

Set configuration for tuner (Hyperband).

In [None]:
tuner = Hyperband(hypermodel,
                 objective = 'val_loss',
                 max_epochs = 20,
                 factor = 3, #number of models to train in a bracket = 1+log_factor(max_epochs)
                 hyperband_iterations = 2, #number of times to iterate over the full Hyperband algorithm
                 seed = 10,
                 directory = 'hypertuning',
                 project_name = 'tune',
                 overwrite = True)

Partition the dataset into train/val/test sets.

In [None]:
X = ZeroBias

train_ratio = 0.7
val_ratio = 0.1
test_ratio = 1 - train_ratio - val_ratio
X_train_val, X_test = train_test_split(X, test_size = test_ratio, random_state = 123)
X_train, X_val = train_test_split(X_train_val, test_size = val_ratio/(val_ratio + train_ratio), random_state = 123)

Run the search. Mind the label when training for reconstruction or something else.

In [None]:
tuner.search(X_train, X_train,
            epochs = 20,
            validation_data = (X_val, X_val),
            batch_size = 256)

Show the best models.

In [None]:
tuner.results_summary(num_trials = 3)

Take one of them for later use.

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]
model = tuner.hypermodel.build(best_hp)
model.summary()

# Conv AE

Convolutional autoencoder to be trained for input reconstruction (to be used as a teacher model for Knowledge Distillation later).

The encoder part, transforming the (18, 14) region input into a smaller latent space.

In [None]:
encoder_input = tf.keras.Input(shape = (18, 14, 1))
#encoding = layers.Conv2D(21, (3, 3), activation = 'relu', strides = 1, padding = 'same')(encoder_input)
#encoding = layers.AveragePooling2D((2, 2))(encoding)
#encoding = layers.Conv2D(19, (3, 3), activation = 'relu', strides = 1, padding = 'same')(encoding)

encoding = layers.Conv2D(20, (3, 3), strides = 1, padding = 'same')(encoder_input)
encoding = layers.Activation('relu')(encoding)
encoding = layers.AveragePooling2D((2, 2))(encoding)
encoding = layers.Conv2D(40, (3, 3), strides = 1, padding = 'same')(encoding)
encoding = layers.Activation('relu')(encoding)
encoding = layers.Flatten()(encoding)

encoder_output = layers.Dense(100, activation = 'relu')(encoding)
#encoder_output = layers.Conv2D(1, (3, 3), activation = 'relu', strides = 1, padding = 'same')(encoding)

encoder = tf.keras.models.Model(encoder_input, encoder_output)
encoder.summary()

The decoder part, reconstructing from latent space back to the (18, 14) region input. Note the Conv2DTranspose is not yet supported in hls4ml, but ok to use if it is going to be distilled to another network. 

In [None]:
#decoding = layers.Conv2D(25, (3, 3), activation = 'relu', strides = 1, padding = 'same')(encoder_output)
#decoding = layers.UpSampling2D((2, 2))(decoding)
#decoding = layers.Conv2D(25, (3, 3), activation = 'relu', strides = 1, padding = 'same')(decoding)

decoding = layers.Dense(9 * 7 * 20)(encoder_output)
decoding = layers.Reshape((9, 7, 20))(decoding)
decoding = layers.Activation('relu')(decoding)
decoding = layers.Conv2D(40, (3, 3), strides = 1, padding = 'same')(decoding)
#decoding = layers.Conv2D(25, (3, 3), strides = 1, padding = 'same')(encoder_output)
decoding = layers.Activation('relu')(decoding)
decoding = layers.UpSampling2D((2, 2))(decoding)
decoding = layers.Conv2D(20, (3, 3), strides = 1, padding = 'same')(decoding)
decoding = layers.Activation('relu')(decoding)

decoder_output = layers.Conv2D(1, (3, 3), activation = 'relu', strides = 1, padding = 'same')(decoding)

In [None]:
model = tf.keras.Model(encoder_input, decoder_output)
model.summary()

In [None]:
model.compile(optimizer = 'adam', loss = 'mse')

# Conv VAE

In [None]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
tfd = tfp.distributions
tfb = tfp.bijectors

In [None]:
class Flow(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z = inputs
        batch = tf.shape(z)[0]
        dim = tf.shape(z)[1]
        tfp.layers.AutoregressiveTransform(tfb.AutoregressiveNetwork(params=10, hidden_units=[10], activation='relu'))
        #return 2*z

In [None]:
latent_dim = 60
encoder_inputs = tf.keras.Input(shape = (28, 28, 1))

encoding = layers.Conv2D(20, (3, 3), strides = 1, padding = 'same')(encoder_inputs)
encoding = layers.Activation('relu')(encoding)
encoding = layers.AveragePooling2D((2, 2))(encoding)
encoding = layers.Conv2D(20, (3, 3), strides = 1, padding = 'same')(encoding)
encoding = layers.Activation('relu')(encoding)
encoding = layers.Flatten()(encoding)

z_mean = layers.Dense(latent_dim, name = 'z_mean')(encoding)
z_log_var = layers.Dense(latent_dim, name = 'z_log_var')(encoding)
z = Sampling()([z_mean, z_log_var])

encoder = tf.keras.Model(encoder_inputs, [z_mean, z_log_var, z], name = 'encoder')
encoder.summary()

In [None]:
latent_inputs = tf.keras.Input(shape = (latent_dim,))

decoding = layers.Dense(14 * 14 * 20)(latent_inputs)
decoding = layers.Reshape((14, 14, 20))(decoding)
decoding = layers.Activation('relu')(decoding)
decoding = layers.Conv2D(20, (3, 3), strides = 1, padding = 'same')(decoding)
decoding = layers.Activation('relu')(decoding)
decoding = layers.UpSampling2D((2, 2))(decoding)
decoding = layers.Conv2D(20, (3, 3), strides = 1, padding = 'same')(decoding)
decoding = layers.Activation('relu')(decoding)

decoder_outputs = layers.Conv2D(1, (3, 3), activation = 'sigmoid', strides = 1, padding = 'same')(decoding)

decoder = tf.keras.Model(latent_inputs, decoder_outputs, name = 'decoder')
decoder.summary()

In [None]:
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis = (1, 2)))
            #reconstruction_loss = tf.reduce_mean(tf.square(data - reconstruction), axis = (1, 2, 3))
            kl_loss = tf.reduce_mean(tf.reduce_sum(-0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)), axis = 1))
            total_loss = reconstruction_loss + kl_loss
            #total_loss = reconstruction_loss
            #total_loss = kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

In [None]:
vae = VAE(encoder, decoder)
vae.compile(optimizer = 'adam')

In [None]:
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
mnist_digits = np.concatenate([x_train, x_test], axis=0)
mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255

In [None]:
(x_train, _), (x_test, _) = keras.datasets.fashion_mnist.load_data()
mnist_digits = np.concatenate([x_train, x_test], axis=0)
mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255

In [None]:
history = vae.fit(mnist_digits, epochs = 30,
                    #validation_data = X_val,
                  batch_size = 512)

In [None]:
plt.figure(figsize = (15,10))
axes_left = plt.subplot(2, 2, 1)
axes_left.plot(history.history['loss'], label = 'Total loss = Reco + KL', c = 'g', linestyle = 'solid')
axes_left.plot(history.history['reconstruction_loss'], label = 'Reconstruction loss (BCE)', c = 'g' , linestyle = 'dashed')
axes_left.legend(loc = "upper left")
axes_left.set_xlabel('Epoch')
axes_left.set_ylabel('Total loss', c = 'g')

axes_right = axes_left.twinx()
axes_right.plot(history.history['kl_loss'], label = 'KL loss', c = 'r', linestyle = 'dashed')
axes_right.legend(loc = "upper right")
axes_right.set_ylabel('KL loss', c = 'r')

In [None]:
def plot_label_clusters(vae, data, labels):
    # display a 2D plot of the digit classes in the latent space
    z_mean, _, _ = vae.encoder.predict(data)
    plt.figure(figsize=(12, 10))
    plt.scatter(z_mean[:, 0], z_mean[:, 1], c=labels)
    #plt.scatter(tf.exp(z_mean[:, 0]/2), tf.exp(z_mean[:, 1]/2), c=labels)
    plt.colorbar()
    plt.xlabel("z[0]")
    plt.ylabel("z[1]")
    plt.show()


#(x_train, y_train), _ = keras.datasets.mnist.load_data()
(x_train, y_train), _ = keras.datasets.fashion_mnist.load_data()
x_train = np.expand_dims(x_train, -1).astype("float32") / 255

plot_label_clusters(vae, x_train, y_train)

In [None]:
_, _, mnist_digits_predict = vae.encoder.predict(mnist_digits)
mnist_digits_predict = vae.decoder.predict(mnist_digits_predict)

In [None]:
for i in range(50,80):
    fig, ax = plt.subplots(figsize = (5,5))
    ax = plt.subplot(2, 2, 1)
    ax = sns.heatmap(mnist_digits[i,:,:,0].reshape(28, 28), vmin = 0, vmax = 1, cmap = "Reds")
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('Input')
    
    ax = plt.subplot(2, 2, 2)
    ax = sns.heatmap(mnist_digits_predict[i,:,:,0].reshape(28, 28), vmin = 0, vmax = 1, cmap = "Reds")
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('Output')
    
    plt.show()

In [None]:
//////////@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

In [None]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
latent_dim = 60
encoder_inputs = tf.keras.Input(shape = (18, 14, 1))

encoding = layers.Conv2D(20, (3, 3), strides = 1, padding = 'same')(encoder_inputs)
encoding = layers.Activation('relu')(encoding)
encoding = layers.AveragePooling2D((2, 2))(encoding)
encoding = layers.Conv2D(20, (3, 3), strides = 1, padding = 'same')(encoding)
encoding = layers.Activation('relu')(encoding)
encoding = layers.Flatten()(encoding)

z_mean = layers.Dense(latent_dim, name = 'z_mean')(encoding)
z_log_var = layers.Dense(latent_dim, name = 'z_log_var')(encoding)
z = Sampling()([z_mean, z_log_var])

encoder = tf.keras.Model(encoder_inputs, [z_mean, z_log_var, z], name = 'encoder')
encoder.summary()

In [None]:
latent_inputs = tf.keras.Input(shape = (latent_dim,))

decoding = layers.Dense(9 * 7 * 20)(latent_inputs)
decoding = layers.Reshape((9, 7, 20))(decoding)
decoding = layers.Activation('relu')(decoding)
decoding = layers.Conv2D(20, (3, 3), strides = 1, padding = 'same')(decoding)
decoding = layers.Activation('relu')(decoding)
decoding = layers.UpSampling2D((2, 2))(decoding)
decoding = layers.Conv2D(20, (3, 3), strides = 1, padding = 'same')(decoding)
decoding = layers.Activation('relu')(decoding)

decoder_outputs = layers.Conv2D(1, (3, 3), activation = 'sigmoid', strides = 1, padding = 'same')(decoding)

decoder = tf.keras.Model(latent_inputs, decoder_outputs, name = 'decoder')
decoder.summary()

In [None]:
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = 10*tf.reduce_mean(tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis = (1, 2)))
            #reconstruction_loss = 100*tf.reduce_mean(tf.square(data - reconstruction), axis = (1, 2, 3))
            kl_loss = tf.reduce_mean(tf.reduce_sum(-0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)), axis = 1))
            total_loss = reconstruction_loss + kl_loss
            #total_loss = reconstruction_loss
            #total_loss = kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

In [None]:
vae = VAE(encoder, decoder)
vae.compile(optimizer = 'adam')

In [None]:
et_scale = 80
history = vae.fit(X_train/et_scale, epochs = 25,
                  #validation_data = (X_val/et_scale,X_val/et_scale),
                  batch_size = 512)

In [None]:
plt.figure(figsize = (15,10))
axes_left = plt.subplot(2, 2, 1)
axes_left.plot(history.history['loss'], label = 'Total loss = Reco + KL', c = 'g', linestyle = 'solid')
axes_left.plot(history.history['reconstruction_loss'], label = 'Reconstruction loss (BCE)', c = 'g' , linestyle = 'dashed')
axes_left.legend(loc = "upper left")
axes_left.set_xlabel('Epoch')
axes_left.set_ylabel('Total loss', c = 'g')

axes_right = axes_left.twinx()
axes_right.plot(history.history['kl_loss'], label = 'KL loss', c = 'r', linestyle = 'dashed')
axes_right.legend(loc = "upper right")
axes_right.set_ylabel('KL loss', c = 'r')

In [None]:
X_test_mu, X_test_logvar, X_test_z = vae.encoder.predict(X_test/et_scale)
X_test_predict = vae.decoder.predict(X_test_z)

MC_zb_mu = []
MC_zb_logvar = []
MC_zb_z = []
MC_zb_predict = []
for i in range(len(MC_zb)):
    mu, logvar, z = vae.encoder.predict(MC_zb[i]/et_scale)
    predict = vae.decoder.predict(z)
    MC_zb_mu.append(mu)
    MC_zb_logvar.append(logvar)
    MC_zb_z.append(z)
    MC_zb_predict.append(predict)

In [None]:
i = 8
j = 9
n = 5

df_z_mu_sig = pd.DataFrame(MC_zb_mu[n][1:9000, i], columns = ["z_mu[{}]".format(i)])
df_z_mu_sig["z_mu[{}]".format(j)] = MC_zb_mu[n][1:9000, j]
df_z_mu_sig["dataset"] = MC_files[n]
df_z_mu_zb = pd.DataFrame(X_test_mu[1:9000, i], columns = ["z_mu[{}]".format(i)])
df_z_mu_zb["z_mu[{}]".format(j)] = X_test_mu[1:9000, j]
df_z_mu_zb["dataset"] = "test (ZB)"
df_z_mu = pd.concat([df_z_mu_sig,df_z_mu_zb])

df_z_sigma_sig = pd.DataFrame(np.exp(MC_zb_logvar[n][1:9000, i]/2), columns = ["z_sigma[{}]".format(i)])
df_z_sigma_sig["z_sigma[{}]".format(j)] = np.exp(MC_zb_logvar[n][1:9000, j]/2)
df_z_sigma_sig["dataset"] = MC_files[n]
df_z_sigma_zb = pd.DataFrame(np.exp(X_test_logvar[1:9000, i]/2), columns = ["z_sigma[{}]".format(i)])
df_z_sigma_zb["z_sigma[{}]".format(j)] = np.exp(X_test_logvar[1:9000, j]/2)
df_z_sigma_zb["dataset"] = "test (ZB)"
df_z_sigma = pd.concat([df_z_sigma_sig,df_z_sigma_zb])

df_z_sig = pd.DataFrame(np.exp(MC_zb_z[n][1:9000, i]/2), columns = ["z[{}]".format(i)])
df_z_sig["z[{}]".format(j)] = np.exp(MC_zb_z[n][1:9000, j]/2)
df_z_sig["dataset"] = MC_files[n]
df_z_zb = pd.DataFrame(np.exp(X_test_z[1:9000, i]/2), columns = ["z[{}]".format(i)])
df_z_zb["z[{}]".format(j)] = np.exp(X_test_z[1:9000, j]/2)
df_z_zb["dataset"] = "test (ZB)"
df_z = pd.concat([df_z_sig,df_z_zb])

plt.figure(figsize=(6, 6))
sns.jointplot(x = df_z_mu["z_mu[{}]".format(i)], y = df_z_mu["z_mu[{}]".format(j)], hue = df_z_mu["dataset"], height = 8, ratio = 5,
              #xlim = (-0.001, 0.001), ylim = (-0.001, 0.001),
              marker = '.', alpha = 1)
plt.show()

plt.figure(figsize=(6, 6))
sns.jointplot(x = df_z_sigma["z_sigma[{}]".format(i)], y = df_z_sigma["z_sigma[{}]".format(j)], hue = df_z_sigma["dataset"], height = 8, ratio = 5,
              #xlim = (-8, 8), ylim = (-8, 8),
              marker = '.', alpha = 1)
plt.show()

plt.figure(figsize=(6, 6))
sns.jointplot(x = df_z["z[{}]".format(i)], y = df_z["z[{}]".format(j)], hue = df_z["dataset"], height = 8, ratio = 5,
              #xlim = (-8, 8), ylim = (-8, 8),
              marker = '.', alpha = 1)
plt.show()

In [None]:
def custom_loss_for_pred(y_true, y_pred, choice):
    #MSE
    if choice == 0:
        loss = np.mean((y_true - y_pred)**2, axis = (1, 2, 3))
        return loss
    
    #MSE for de-noising model
    if choice == 1:
        loss = np.mean(y_pred**2, axis = (1, 2, 3))
        return loss
    
    #VAE radius loss
    if choice == 2:
        loss = np.sqrt(np.sum(y_pred**2, axis = 1))
        return loss
    
    #VAE KL loss
    if choice == 3:
        loss = -0.5 * np.sum(1.0 + y_pred - y_true**2 - np.exp(y_pred), axis = 1)
        return loss
    
    #BCE loss
    if choice == 4:
        #loss = np.mean(np.sum(keras.losses.binary_crossentropy(y_true, y_pred), axis = (1, 2)))
        loss = np.sum(keras.losses.binary_crossentropy(y_true, y_pred), axis = (1, 2))
        return loss

In [None]:
#For VAE
X_test_vaeloss_mse = custom_loss_for_pred(X_test/et_scale, X_test_predict, 0)
MC_zb_vaeloss_mse = []
for i in range(len(MC_zb)):
    MC_zb_vaeloss_mse.append(custom_loss_for_pred(MC_zb[i]/et_scale, MC_zb_predict[i], 0))
    
X_test_vaeloss_bce = custom_loss_for_pred(X_test/et_scale, X_test_predict, 4)
MC_zb_vaeloss_bce = []
for i in range(len(MC_zb)):
    MC_zb_vaeloss_bce.append(custom_loss_for_pred(MC_zb[i]/et_scale, MC_zb_predict[i], 4))

X_test_vaeloss_radius = custom_loss_for_pred(X_test, X_test_mu, 2)
MC_zb_vaeloss_radius = []
for i in range(len(MC_zb)):
    MC_zb_vaeloss_radius.append(custom_loss_for_pred(MC_zb[i], MC_zb_mu[i], 2))
    
X_test_vaeloss_kl = custom_loss_for_pred(X_test_mu, X_test_logvar, 3)
MC_zb_vaeloss_kl = []
for i in range(len(MC_zb)):
    MC_zb_vaeloss_kl.append(custom_loss_for_pred(MC_zb_mu[i], MC_zb_logvar[i], 3))

weight_mse = 0
weight_bce = 0.5
weight_radius = 0
weight_kl = 0.5

X_test_vaeloss = weight_mse*X_test_vaeloss_mse + weight_bce*X_test_vaeloss_bce + weight_radius*X_test_vaeloss_radius + weight_kl*X_test_vaeloss_kl
MC_zb_vaeloss = []
for i in range(len(MC_zb)):
    MC_zb_vaeloss.append(weight_mse*MC_zb_vaeloss_mse[i] + weight_bce*MC_zb_vaeloss_bce[i] + weight_radius*MC_zb_vaeloss_radius[i] + weight_kl*MC_zb_vaeloss_kl[i])

In [None]:
#Original vs Reconstructed
#show_ZB = True
show_ZB = False
n = 5
for i in range(580,590):
    fig, ax = plt.subplots(figsize = (17,17))
    if show_ZB == True:
        print('ZB test\nloss = ' + str(X_test_vaeloss[i]))
    else:
        print(str(MC_files[n]) + '\nloss = ' + str(MC_zb_vaeloss[n][i]))
    ax = plt.subplot(3, 3, 1)
    if show_ZB == True:
        ax = sns.heatmap(X_test[i,:,:,0].reshape(18, 14)/et_scale, vmin = 0, vmax = X_test[i,:,:,0].max()/et_scale, cmap = "Blues", cbar_kws = {'label': 'Scaled ET'})
    else:
        ax = sns.heatmap(MC_zb[n][i,:,:,0].reshape(18, 14)/et_scale, vmin = 0, vmax = MC_zb[n][i,:,:,0].max()/et_scale, cmap = "Blues", cbar_kws = {'label': 'Scaled ET'})
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('Original')
    
    ax = plt.subplot(3, 3, 2)
    if show_ZB == True:
        ax = sns.heatmap(X_test_predict[i,:,:,0].reshape(18, 14), vmin = 0, vmax = X_test[i,:,:,0].max()/et_scale, cmap = "Blues", cbar_kws = {'label': 'Scaled ET'})
    else:
        ax = sns.heatmap(MC_zb_predict[n][i,:,:,0].reshape(18, 14), vmin = 0, vmax = MC_zb[n][i,:,:,0].max()/et_scale, cmap = "Blues", cbar_kws = {'label': 'Scaled ET'})
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('Reconstructed')
    
    ax = plt.subplot(3, 3, 3)
    if show_ZB == True:
        ax = sns.heatmap(np.absolute(X_test_predict[i,:,:,0] - X_test[i,:,:,0]/et_scale).reshape(18, 14), vmin = 0, vmax = X_test[i,:,:,0].max()/et_scale, cmap = "Blues", cbar_kws = {'label': 'Scaled ET'})
    else:
        ax = sns.heatmap(np.absolute(MC_zb_predict[n][i,:,:,0] - MC_zb[n][i,:,:,0]/et_scale).reshape(18, 14), vmin = 0, vmax = MC_zb[n][i,:,:,0].max()/et_scale, cmap = "Blues", cbar_kws = {'label': 'Scaled ET'})
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('abs(original-reconstructed)')
    plt.show()

In [None]:
nbins = 20
rmin = 0
rmax = 0.05
plt.hist(X_test_vaeloss_mse, density = 1, bins = nbins, alpha = 0.3, label = 'test (ZeroBias)', range = (rmin, rmax), log = True)
plt.hist(MC_zb_vaeloss_mse[0], density = 1, bins = nbins, label = 'QCD', alpha = 0.1, histtype = 'stepfilled', range = (rmin, rmax))
for i in range(3,7):
    plt.hist(MC_zb_vaeloss_mse[i], density = 1, bins = nbins, label = MC_files[i], histtype = 'step', range = (rmin, rmax))
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel("Reconstruction MSE loss")
plt.show()

nbins = 20
rmin = 0
rmax = 140
plt.hist(X_test_vaeloss_bce, density = 1, bins = nbins, alpha = 0.3, label = 'test (ZeroBias)', range = (rmin, rmax), log = True)
plt.hist(MC_zb_vaeloss_bce[0], density = 1, bins = nbins, label = 'QCD', alpha = 0.1, histtype = 'stepfilled', range = (rmin, rmax))
for i in range(3,7):
    plt.hist(MC_zb_vaeloss_bce[i], density = 1, bins = nbins, label = MC_files[i], histtype = 'step', range = (rmin, rmax))
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel("Reconstruction BCE loss")
plt.show()

nbins = 20
rmin = 0
rmax = 20
plt.hist(X_test_vaeloss_radius, density = 1, bins = nbins, alpha = 0.3, label = 'test (ZeroBias)', range = (rmin, rmax), log = True)
plt.hist(MC_zb_vaeloss_radius[0], density = 1, bins = nbins, label = 'QCD', alpha = 0.1, histtype = 'stepfilled', range = (rmin, rmax))
for i in range(3,7):
    plt.hist(MC_zb_vaeloss_radius[i], density = 1, bins = nbins, label = MC_files[i], histtype = 'step', range = (rmin, rmax))
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel("Radius loss")
plt.show()

nbins = 20
rmin = 0
rmax = 20
plt.hist(X_test_vaeloss_kl, density = 1, bins = nbins, alpha = 0.3, label = 'test (ZeroBias)', range = (rmin, rmax), log = True)
plt.hist(MC_zb_vaeloss_kl[0], density = 1, bins = nbins, label = 'QCD', alpha = 0.1, histtype = 'stepfilled', range = (rmin, rmax))
for i in range(3,7):
    plt.hist(MC_zb_vaeloss_kl[i], density = 1, bins = nbins, label = MC_files[i], histtype = 'step', range = (rmin, rmax))
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel("KL loss")
plt.show()

In [None]:
n = 5

df_vaeloss_sig = pd.DataFrame(MC_zb_vaeloss_mse[n][1:9000], columns = ["MSE loss"])
df_vaeloss_sig["BCE loss"] = MC_zb_vaeloss_bce[n][1:9000]
df_vaeloss_sig["Radius loss"] = MC_zb_vaeloss_radius[n][1:9000]
df_vaeloss_sig["KL loss"] = MC_zb_vaeloss_kl[n][1:9000]
df_vaeloss_sig["dataset"] = MC_files[n]
df_vaeloss_zb = pd.DataFrame(X_test_vaeloss_mse[1:9000], columns = ["MSE loss"])
df_vaeloss_zb["BCE loss"] = X_test_vaeloss_bce[1:9000]
df_vaeloss_zb["Radius loss"] = X_test_vaeloss_radius[1:9000]
df_vaeloss_zb["KL loss"] = X_test_vaeloss_kl[1:9000]
df_vaeloss_zb["dataset"] = "test (ZB)"

df_vaeloss = pd.concat([df_vaeloss_sig,df_vaeloss_zb])

plt.figure(figsize=(6, 6))
sns.jointplot(x = df_vaeloss["MSE loss"], y = df_vaeloss["BCE loss"], height = 8, ratio = 5, hue = df_vaeloss["dataset"],
              xlim = (0.0, 0.05), ylim = (0, 100),
              marker = '.', alpha = 1)
plt.show()

plt.figure(figsize=(6, 6))
sns.jointplot(x = df_vaeloss["Radius loss"], y = df_vaeloss["KL loss"], height = 8, ratio = 5, hue = df_vaeloss["dataset"],
              xlim = (0, 10), ylim = (0, 50),
              marker = '.', alpha = 1)
plt.show()

plt.figure(figsize=(6, 6))
sns.jointplot(x = df_vaeloss["MSE loss"], y = df_vaeloss["KL loss"], height = 8, ratio = 5, hue = df_vaeloss["dataset"],
              xlim = (0, 0.05), ylim = (0, 50),
              marker = '.', alpha = 1)
plt.show()

plt.figure(figsize=(6, 6))
sns.jointplot(x = df_vaeloss["BCE loss"], y = df_vaeloss["KL loss"], height = 8, ratio = 5 ,hue = df_vaeloss["dataset"],
              xlim = (0, 100), ylim = (0, 40),
              marker = '.', alpha = 1)
plt.show()

In [None]:
ZeroBias_mean = np.mean(ZeroBias, axis = 0)

baseline_zb = np.mean((X_test - ZeroBias_mean)**2, axis = (1, 2))
baseline_mc = []
for i in range(len(MC_zb)):
    baseline_mc.append(np.mean((MC_zb[i] - ZeroBias_mean)**2, axis = (1, 2)))

Y_zb = np.zeros((X_test.shape[0], 1))
Y_mc = []
for i in range(len(MC)):
    Y_mc.append(np.ones((MC_zb[i].shape[0], 1)))

Y_true = []
Y_baseline = []
Y_mse = []
Y_bce = []
Y_radius = []
Y_kl = []
Y_total = []
for i in range(len(MC)):
    Y_true.append(np.concatenate((Y_mc[i], Y_zb)))
    Y_baseline.append(np.concatenate((baseline_mc[i], baseline_zb)))
    Y_mse.append(np.concatenate((MC_zb_vaeloss_mse[i], X_test_vaeloss_mse)))
    Y_bce.append(np.concatenate((MC_zb_vaeloss_bce[i], X_test_vaeloss_bce)))
    Y_radius.append(np.concatenate((MC_zb_vaeloss_radius[i], X_test_vaeloss_radius)))
    Y_kl.append(np.concatenate((MC_zb_vaeloss_kl[i], X_test_vaeloss_kl)))
    Y_total.append(np.concatenate((MC_zb_vaeloss[i], X_test_vaeloss)))

In [None]:
n = 5

plt.figure(figsize = (13, 13))
axes = plt.subplot(2, 2, 1)

fpr_baseline = np.empty((Y_true[n].shape[0],1))
tpr_baseline = np.empty((Y_true[n].shape[0],1))
thresholds_baseline = np.empty((Y_true[n].shape[0],1))
roc_auc_baseline = np.empty((Y_true[n].shape[0],1))

fpr_mse = np.empty((Y_true[n].shape[0],1))
tpr_mse = np.empty((Y_true[n].shape[0],1))
thresholds_mse = np.empty((Y_true[n].shape[0],1))
roc_auc_mse = np.empty((Y_true[n].shape[0],1))

fpr_bce = np.empty((Y_true[n].shape[0],1))
tpr_bce = np.empty((Y_true[n].shape[0],1))
thresholds_bce = np.empty((Y_true[n].shape[0],1))
roc_auc_bce = np.empty((Y_true[n].shape[0],1))

fpr_radius = np.empty((Y_true[n].shape[0],1))
tpr_radius = np.empty((Y_true[n].shape[0],1))
thresholds_radius = np.empty((Y_true[n].shape[0],1))
roc_auc_radius = np.empty((Y_true[n].shape[0],1))

fpr_kl = np.empty((Y_true[n].shape[0],1))
tpr_kl = np.empty((Y_true[n].shape[0],1))
thresholds_kl = np.empty((Y_true[n].shape[0],1))
roc_auc_kl = np.empty((Y_true[n].shape[0],1))

fpr_total = np.empty((Y_true[n].shape[0],1))
tpr_total = np.empty((Y_true[n].shape[0],1))
thresholds_total = np.empty((Y_true[n].shape[0],1))
roc_auc_total = np.empty((Y_true[n].shape[0],1))

fpr_baseline, tpr_baseline, thresholds_baseline = roc_curve(Y_true[n], Y_baseline[n])
roc_auc_baseline = auc(fpr_baseline, tpr_baseline)

fpr_mse, tpr_mse, thresholds_mse = roc_curve(Y_true[n], Y_mse[n])
roc_auc_mse = auc(fpr_mse, tpr_mse)

fpr_bce, tpr_bce, thresholds_bce = roc_curve(Y_true[n], Y_bce[n])
roc_auc_bce = auc(fpr_bce, tpr_bce)

fpr_radius, tpr_radius, thresholds_radius = roc_curve(Y_true[n], Y_radius[n])
roc_auc_radius = auc(fpr_radius, tpr_radius)

fpr_kl, tpr_kl, thresholds_kl = roc_curve(Y_true[n], Y_kl[n])
roc_auc_kl = auc(fpr_kl, tpr_kl)

fpr_total, tpr_total, thresholds_total = roc_curve(Y_true[n], Y_total[n])
roc_auc_total = auc(fpr_total, tpr_total)

lw = 2

axes.plot(fpr_baseline, tpr_baseline, linestyle = '--', lw = lw, color = 'red', label = 'Cut-flow baseline (AUC = %.4f)' % (roc_auc_baseline))
axes.plot(fpr_mse, tpr_mse, linestyle = '-', lw = lw, label = 'VAE anomaly score = MSE loss (AUC = %.4f)' % (roc_auc_mse))
axes.plot(fpr_bce, tpr_bce, linestyle = '-', lw = lw, label = 'VAE anomaly score = BCE loss (AUC = %.4f)' % (roc_auc_bce))
axes.plot(fpr_radius, tpr_radius, linestyle = '-', lw = lw, label = 'VAE anomaly score = radius loss (AUC = %.4f)' % (roc_auc_radius))
axes.plot(fpr_kl, tpr_kl, linestyle = '-', lw = lw, label = 'VAE anomaly score = KL loss (AUC = %.4f)' % (roc_auc_kl))
axes.plot(fpr_total, tpr_total, linestyle = '--', lw = lw, label = 'VAE anomaly score = BCE + KL (AUC = %.4f)' % (roc_auc_total))

axes.plot([0.002, 0.002], [0, 1], linestyle = '--', lw = 1, color = 'black', label = 'FPR = 0.2% ~ (100 kHz)/(ZB rate)')
axes.set_xlim([0.00001, 1.0])
#axes.set_xlim([0, 1.0])
axes.set_ylim([0, 1.0])
#axes.set_ylim([0.9, 1.0])
axes.set_xscale(value = "log")
#axes.set_yscale(value = "log")
axes.set_xlabel('False Positive Rate (FPR)')
axes.set_ylabel('True Positive Rate (TPR)')
axes.set_title(MC_files[n] + ' vs ZB')
axes.legend(loc='center left', bbox_to_anchor = (0.6, 0.5))
plt.show()

# Dense model

Fully connected dense model to be trained for input reconstruction (less useful since region correlation is lost to some degree in the Flatten layer).

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.InputLayer(input_shape = (18, 14, 1)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(20, activation = 'relu'))
model.add(tf.keras.layers.Dense(20, activation = 'relu'))
model.add(tf.keras.layers.Dense(10, activation = 'relu'))
model.add(tf.keras.layers.Dense(20 , activation = 'relu'))
model.add(tf.keras.layers.Dense(20 , activation = 'relu'))
model.add(tf.keras.layers.Dense(252 , activation = 'sigmoid'))
model.add(tf.keras.layers.Reshape((18, 14, 1)))
model.summary()
model.compile(optimizer = 'adam', loss = 'mse')

# De-noising model (De-ZeroBias model)

Convolutional autoencoder to be trained for ZB pattern removal. Experimental and for fun only.

In [None]:
encoder_input = tf.keras.Input(shape = (18, 14, 1))
encoding = layers.Conv2D(20, (3, 3), activation = 'relu', strides = 1, padding = 'same')(encoder_input)
#encoding = layers.AveragePooling2D((2, 2))(encoding)
encoder_output = layers.Conv2D(20, (3, 3), activation = 'relu', strides = 1, padding = 'same')(encoding)

#encoder_output = layers.Conv2D(2, (3, 3), activation = 'relu', strides = 1, padding = 'same')(encoding)

encoder = tf.keras.models.Model(encoder_input, encoder_output)
encoder.summary()

In [None]:
decoding = layers.Conv2D(20, (3, 3), activation = 'relu', strides = 1, padding = 'same')(encoder_output)
#decoding = layers.UpSampling2D((2, 2))(decoding)
decoding = layers.Conv2D(20, (3, 3), activation = 'relu', strides = 1, padding = 'same')(decoding)

decoder_output = layers.Conv2D(1, (3, 3), activation = 'relu', strides = 1, padding = 'same')(decoding)

In [None]:
model = tf.keras.Model(encoder_input, decoder_output)
model.summary()

In [None]:
model.compile(optimizer = 'adam', loss = 'binary_crossentropy')
#model.compile(optimizer = 'adam', loss = 'mse')

# Training

Partition the dataset into train/val/test sets.

In [None]:
X = ZeroBias

train_ratio = 0.8
val_ratio = 0.10
test_ratio = 1 - train_ratio - val_ratio
X_train_val, X_test = train_test_split(X, test_size = test_ratio, random_state = 1234)
X_train, X_val = train_test_split(X_train_val, test_size = val_ratio/(val_ratio + train_ratio), random_state = 1234)

!!! Only for de-noising model. Preparation of noisy and clean training data.

In [None]:
np.random.seed(0)

RandomTowers = np.zeros((ZeroBias.shape[0], 18, 14, 1))

random_phi1 = np.random.randint(low = 1, high = 17, size = np.int32(RandomTowers.shape[0]/2))
random_eta1 = np.random.randint(low = 1, high = 13, size = np.int32(RandomTowers.shape[0]/2))
random_pt1 = np.random.randint(low = 20, high = 100, size = np.int32(RandomTowers.shape[0]/2))
random_phi2 = np.random.randint(low = 1, high = 17, size = np.int32(RandomTowers.shape[0]/2))
random_eta2 = np.random.randint(low = 1, high = 13, size = np.int32(RandomTowers.shape[0]/2))
random_pt2 = np.random.randint(low = 20, high = 100, size = np.int32(RandomTowers.shape[0]/2))

for i in range(np.int32(RandomTowers.shape[0]/2)):
    RandomTowers[i, random_phi1[i], random_eta1[i], 0] = random_pt1[i]/1023
    RandomTowers[i, random_phi1[i]+1, random_eta1[i]+1, 0] = random_pt2[i]/1023
    RandomTowers[i, random_phi2[i], random_eta2[i], 0] = random_pt2[i]/1023
    RandomTowers[i, random_phi2[i]+1, random_eta2[i], 0] = random_pt1[i]/1023

Xnoise = 3*X+RandomTowers
Xclean = X+RandomTowers

train_ratio = 0.8
val_ratio = 0.10
test_ratio = 1 - train_ratio - val_ratio

Xnoise_train_val, Xnoise_test = train_test_split(Xnoise, test_size = test_ratio, random_state = 123)
Xnoise_train, Xnoise_val = train_test_split(Xnoise_train_val, test_size = val_ratio/(val_ratio + train_ratio), random_state = 123)

Xclean_train_val, Xclean_test = train_test_split(Xclean, test_size = test_ratio, random_state = 123)
Xclean_train, Xclean_val = train_test_split(Xclean_train_val, test_size = val_ratio/(val_ratio + train_ratio), random_state = 123)

!!! Only for de-noising model. Plot and compare the noisy and clean training data.

In [None]:
for i in range(230,240):
    fig, ax = plt.subplots(figsize = (10,10))
    ax = plt.subplot(2, 2, 1)
    ax = sns.heatmap(Xnoise_train[i,:,:,0].reshape(18, 14), vmin = 0, vmax = Xnoise_train[i,:,:,0].max(), cmap = "Reds", cbar_kws = {'label': 'Normalized ET'})
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('X_noise')
    
    ax = plt.subplot(2, 2, 2)
    ax = sns.heatmap(Xclean_train[i,:,:,0].reshape(18, 14), vmin = 0, vmax = Xnoise_train[i,:,:,0].max(), cmap = "Reds", cbar_kws = {'label': 'Normalized ET'})
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('X_clean')
    
    plt.show()

Run the training. Mind the label when training for reconstruction or something else.

In [None]:
history = model.fit(X_train, X_train,
                    epochs = 3,
                    validation_data = (X_val, X_val),
                    batch_size = 1024,
                    callbacks = [
                        tf.keras.callbacks.EarlyStopping(monitor = "val_loss", patience = 10, mode = "min")
                    ])

Plot loss vs epoch.

In [None]:
plt.figure(figsize = (15,10))
axes = plt.subplot(2, 2, 1)
axes.plot(history.history['loss'], label = 'train loss')
#axes.set_yscale(value = "log")
axes.plot(history.history['val_loss'], label = 'val loss')
axes.legend(loc = "upper right")
axes.set_xlabel('Epoch')
axes.set_ylabel('Loss')

# Saving/loading trained models

In [None]:
model.save('saved_models/teacher/')

In [None]:
model_student.save('saved_models/student_quantized/')

In [None]:
model = tf.keras.models.load_model('saved_models/teacher')
model.summary()

In [None]:
model_student = tf.keras.models.load_model('saved_models/student_quantized')
model_student.summary()

In [None]:
qmodel.save('saved_models/qmodel1/')

In [None]:
qmodel = tf.keras.models.load_model('saved_models/qmodel1')
qmodel.summary()

In [None]:
model.save('saved_models/compare_flatbig100/')

In [None]:
model = tf.keras.models.load_model('saved_models/compare_flatbig120')
model.summary()

# Loss distribution

Feed all datasets into the trained model to compute prediction outputs.

In [None]:
X_train_predict = model.predict(X_train)
X_test_predict = model.predict(X_test)
MC_zb_predict = []
for i in range(len(MC_zb)):
    MC_zb_predict.append(model.predict(MC_zb[i]))

Define loss function to use for inference.

In [None]:
def custom_loss_for_pred(y_true, y_pred, choice):
    #MSE
    if choice == 0:
        loss = np.mean((y_true - y_pred)**2, axis = (1, 2, 3))
        return loss
    
    #MSE for de-noising model
    if choice == 1:
        loss = np.mean(y_pred**2, axis = (1, 2, 3))
        return loss
    
    #VAE radius loss
    if choice == 2:
        loss = np.sqrt(np.sum(y_pred**2, axis = 1))
        return loss
    
    #VAE KL loss
    if choice == 3:
        loss = -0.5 * np.sum(1.0 + y_pred - y_true**2 - np.exp(y_pred), axis = 1)
        return loss

Compute loss for all samples.

In [None]:
loss_choice = 0

X_train_loss = custom_loss_for_pred(X_train, X_train_predict, loss_choice)
X_test_loss = custom_loss_for_pred(X_test, X_test_predict, loss_choice)

MC_zb_loss = []
for i in range(len(MC_zb)):
    MC_zb_loss.append(custom_loss_for_pred(MC_zb[i], MC_zb_predict[i], loss_choice))

Plot loss distributions.

In [None]:
nbins = 40
rmin = 0
rmax = 50
#plt.hist(X_train_loss, density = 1, bins = nbins, alpha = 0.3, label = 'train (ZeroBias)', range = (rmin, rmax), log = True)
plt.hist(X_test_loss, density = 1, bins = nbins, alpha = 0.3, label = 'test (ZeroBias)', range = (rmin, rmax), log = True)
plt.hist(MC_zb_loss[0], density = 1, bins = nbins, label = 'QCD', alpha = 0.1, histtype = 'stepfilled', range = (rmin, rmax))
for i in range(3,7):
    plt.hist(MC_zb_loss[i], density = 1, bins = nbins, label = MC_files[i], histtype = 'step', range = (rmin, rmax))
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel("loss")
#plt.xticks(np.arange(rmin, rmax, step = 0.0002))
plt.show()

# Comparison between original and reconstructed inputs

In [None]:
#Original vs Reconstructed
#show_ZB = True
show_ZB = False
n = 5
for i in range(780,790):
    fig, ax = plt.subplots(figsize = (17,17))
    if show_ZB == True:
        print('ZB test\nloss = ' + str(X_test_loss[i]))
    else:
        print(str(MC_files[n]) + '\nloss = ' + str(MC_zb_loss[n][i]))
    ax = plt.subplot(3, 3, 1)
    if show_ZB == True:
        ax = sns.heatmap(X_test[i,:,:,0].reshape(18, 14), vmin = 0, vmax = X_test[i,:,:,0].max(), cmap = "Reds", cbar_kws = {'label': 'ET (GeV)'})
    else:
        ax = sns.heatmap(MC_zb[n][i,:,:,0].reshape(18, 14), vmin = 0, vmax = MC_zb[n][i,:,:,0].max(), cmap = "Reds", cbar_kws = {'label': 'ET (GeV)'})
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('Original')
    
    ax = plt.subplot(3, 3, 2)
    if show_ZB == True:
        ax = sns.heatmap(X_test_predict[i,:,:,0].reshape(18, 14), vmin = 0, vmax = X_test[i,:,:,0].max(), cmap = "Reds", cbar_kws = {'label': 'ET (GeV)'})
    else:
        ax = sns.heatmap(MC_zb_predict[n][i,:,:,0].reshape(18, 14), vmin = 0, vmax = MC_zb[n][i,:,:,0].max(), cmap = "Reds", cbar_kws = {'label': 'ET (GeV)'})
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('Reconstructed')
    
    ax = plt.subplot(3, 3, 3)
    if show_ZB == True:
        ax = sns.heatmap(np.absolute(X_test_predict[i,:,:,0] - X_test[i,:,:,0]).reshape(18, 14), vmin = 0, vmax = X_test[i,:,:,0].max(), cmap = "Reds", cbar_kws = {'label': 'ET (GeV)'})
    else:
        ax = sns.heatmap(np.absolute(MC_zb_predict[n][i,:,:,0] - MC_zb[n][i,:,:,0]).reshape(18, 14), vmin = 0, vmax = MC_zb[n][i,:,:,0].max(), cmap = "Reds", cbar_kws = {'label': 'ET (GeV)'})
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.set_title('abs(original-reconstructed)')
    plt.show()

# Knowledge Distillation (+ quantizing with QKeras)

In [None]:
from qkeras import *

Construct student model without quantization.

In [None]:
x_in = layers.Input(shape = (18, 14, 1), name = "In")
x = layers.Flatten(name = "Flatten")(x_in)
x = layers.Dense(15, use_bias = False, name = "Dense_1")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu', name = "Activation_1")(x)
x = layers.Dense(1, name = "Out")(x)

model_student = tf.keras.models.Model(x_in, x)
model_student.summary()
model_student.compile(optimizer = 'adam', loss = 'mse')

Construct student model with pre-defined quantization configuration.

In [None]:
#For quantization-aware training
x_in = layers.Input(shape = (18, 14, 1))
x = layers.Flatten()(x_in)
x = QDense(26,
           kernel_quantizer = quantized_bits(10, 5, 1),
           bias_quantizer = quantized_bits(6, 3, 1))(x)
x = QActivation('quantized_relu(bits=10, integer=5)')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(rate = 0.3)(x)
x = QDense(1,
           kernel_quantizer = quantized_bits(10, 5, 1),
           bias_quantizer = quantized_bits(6, 3, 1))(x)
x = QActivation('quantized_relu(bits=10, integer=6)')(x)

model_student = tf.keras.models.Model(x_in, x)
model_student.summary()
model_student.compile(optimizer = 'adam', loss = 'mse')

Train the student model with knowledge distilled from a pre-trained teacher model.

In [None]:
history_student = model_student.fit(X_train, X_train_loss,
                                    epochs = 50,
                                    validation_data = (X_val, X_val_loss),
                                    batch_size = 1024,
                                    callbacks = [
                                        #tensorboard_callback,
                                        tf.keras.callbacks.EarlyStopping(monitor = "val_loss", patience = 5, mode = "min")
                                    ])

Plot loss vs epoch.

In [None]:
plt.figure(figsize = (15,10))
axes = plt.subplot(2, 2, 1)
axes.plot(history_student.history['loss'], label = 'train loss')
#axes.set_yscale(value = "log")
axes.plot(history_student.history['val_loss'], label = 'val loss')
axes.legend(loc = "upper right")
axes.set_xlabel('Epoch')
axes.set_ylabel('Loss')

Feed all datasets into the trained model to compute prediction outputs.

In [None]:
X_train_predict_student = model_student.predict(X_train)
X_test_predict_student = model_student.predict(X_test)
MC_zb_predict_student = []
for i in range(len(MC_zb)):
    MC_zb_predict_student.append(model_student.predict(MC_zb[i]))

Plot loss distributions.

In [None]:
nbins = 40
rmin = 0
rmax = 70
plt.hist(X_train_predict_student, density = 1, bins = nbins, alpha = 0.3, label = 'train (ZeroBias)', range = (rmin, rmax), log = True)
plt.hist(X_test_predict_student, density = 1, bins = nbins, alpha = 0.3, label = 'test (ZeroBias)', range = (rmin, rmax))
plt.hist(MC_zb_predict_student[0], density = 1, bins = nbins, label = 'QCD', alpha = 0.1, histtype = 'stepfilled', range = (rmin, rmax))
plt.hist(MC_zb_predict_student[1], density = 1, bins = nbins, label = 'SingleNu', alpha = 0.1, histtype = 'stepfilled', range = (rmin, rmax))
for i in range(40,50):
    plt.hist(MC_zb_predict_student[i], density = 1, bins = nbins, label = MC_files[i], histtype = 'step', range = (rmin, rmax))
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel("loss")
#plt.xticks(np.arange(rmin, rmax, step = 0.0002))
plt.show()

# Knowledge Distillation (+ quantizing with AutoQKeras)

In [None]:
from qkeras import *
from qkeras.autoqkeras import *

In [None]:
def metric_for_autoqk(y_true, y_pred):
    loss = 1.5 - tf.reduce_mean(tf.square(y_true - y_pred), axis = -1)
    return loss

In [None]:
x_in = layers.Input(shape=(18,14,1),name="In")
x = layers.Flatten(name="Flatten")(x_in)
x = layers.Dense(15,use_bias=False,name="Dense_1")(x)
x = layers.Activation('relu',name="Activation_1")(x)
x = QBatchNormalization(name="QBN_1")(x)
x = layers.Dense(1,use_bias=False,name="Out")(x)

qmodel_original = tf.keras.models.Model(x_in, x)
qmodel_original.summary()
qmodel_original.compile(optimizer = 'adam', loss = 'mse', metrics = [metric_for_autoqk])

In [None]:
quantization_config = {
        "kernel": {
                "quantized_bits(2,0,1,alpha=1.0)": 2,
                "quantized_bits(2,1,1,alpha=1.0)": 2,
                "quantized_bits(3,0,1,alpha=1.0)": 3,
                "quantized_bits(3,1,1,alpha=1.0)": 3,
                "quantized_bits(4,0,1,alpha=1.0)": 4,
                "quantized_bits(4,1,1,alpha=1.0)": 4,
                "quantized_bits(4,2,1,alpha=1.0)": 4,
                "quantized_bits(5,0,1,alpha=1.0)": 5,
                "quantized_bits(5,2,1,alpha=1.0)": 5,
                "quantized_bits(6,0,1,alpha=1.0)": 6,
                "quantized_bits(6,2,1,alpha=1.0)": 6
        },
        "activation": {
                "quantized_relu(2,0)": 2,
                "quantized_relu(2,1)": 2,
                "quantized_relu(3,0)": 3,
                "quantized_relu(3,1)": 3,
                "quantized_relu(4,0)": 4,
                "quantized_relu(4,1)": 4,
                "quantized_relu(5,0)": 5,
                "quantized_relu(5,2)": 5,
                "quantized_relu(6,2)": 6
        }
}

limit = {
    "Dense": [6,6,6], # format for Dense is max bits for [kernel,bias,activation] 
    "Activation": [6] # format for Activation is max bits for [activation]
    #"BatchNormalization": []
}

goal = {
    "type": "bits", # energy, bits
    "params": {
        "delta_p": 8.0,
        "delta_n": 8.0,
        "rate": 2.0,
        "stress": 1.0,
        #"process": "horowitz",
        #"parameters_on_memory": ["sram", "sram"],
        #"activations_on_memory": ["sram", "sram"],
        #"rd_wr_on_io": [False, False],
        #"min_sram_size": [0, 0],
        #"source_quantizers": ["quantized_bits(bits=10,integer=10,symmetric=0,keep_negative=False)"],
        #"reference_internal": "int8",
        #"reference_accumulator": "int32"
        "input_bits": 8,
        "output_bits": 8,
        "ref_bits": 8,
        "config": {
            "default": ["parameters", "activations"]
        }
    }
}

run_config = {
  "output_dir": "run_config",
  "goal": goal,
  "quantization_config": quantization_config,
  "learning_rate_optimizer": False, # False since still experimental
  "transfer_weights": False, # False for the #filters/neurons to float
  "mode": "hyperband", # random/bayesian/hyperband
  "seed": 123,
  "limit": limit,
  "tune_filters": "none", # layer/block/none(no filter tunning at all)
  "tune_filters_exceptions": "",
  #"layer_indexes": range(1 + 1, len(qmodel_original.layers))
  "layer_indexes": (2,3,5)
}

print("quantizing layers:", [qmodel_original.layers[i].name for i in run_config["layer_indexes"]])

In [None]:
autoqk = AutoQKeras(qmodel_original, metrics=[metric_for_autoqk], custom_objects={}, **run_config)
autoqk.fit(X_train, X_train_loss, validation_data=(X_val, X_val_loss), batch_size=1024, epochs=15)
# i = log(reference_size / trial_size) / log(rate)
# delta = i * ( (i < 0) * delta_n + (i >= 0) * delta_p )
# objective to maximize in the search is
# adjusted score =  metric * (1 + delta), as formulated in the paper

In [None]:
qmodel = autoqk.get_best_model()

In [None]:
qmodel.save_weights("qmodel1.h5")

In [None]:
qmodel.load_weights("qmodel1.h5")

In [None]:
qmodel.compile(optimizer='adam', loss="mse")
history_qmodel = qmodel.fit(X_train, X_train_loss, epochs=50, batch_size=1024, validation_data=(X_val, X_val_loss))

In [None]:
plt.figure(figsize = (15,10))
axes = plt.subplot(2, 2, 1)
axes.plot(history_qmodel.history['loss'], label = 'train loss')
#axes.set_yscale(value = "log")
axes.plot(history_qmodel.history['val_loss'], label = 'val loss')
axes.legend(loc = "upper right")
axes.set_xlabel('Epoch')
axes.set_ylabel('Loss')

In [None]:
X_train_predict_qmodel = qmodel.predict(X_train)
X_test_predict_qmodel = qmodel.predict(X_test)
MC_zb_predict_qmodel = []
for i in range(len(MC_zb)):
    MC_zb_predict_qmodel.append(qmodel.predict(MC_zb[i]))

In [None]:
nbins = 20
rmin = 0
rmax = 7
#plt.hist(X_train_predict_qmodel, density = 1, bins = nbins, alpha = 0.3, label = 'train (ZeroBias)', range = (rmin, rmax), log = True)
plt.hist(X_test_predict_qmodel, density = 1, bins = nbins, alpha = 0.3, label = 'test (ZeroBias)', range = (rmin, rmax), log = False)
#plt.hist(MC_zb_predict_qmodel[0], density = 1, bins = nbins, label = 'QCD', alpha = 0.1, histtype = 'stepfilled', range = (rmin, rmax))
#plt.hist(MC_zb_predict_qmodel[1], density = 1, bins = nbins, label = 'SingleNu', alpha = 0.1, histtype = 'stepfilled', range = (rmin, rmax))
plt.hist(MC_zb_predict_qmodel[3], density = 1, bins = nbins, label = 'SM HH->4b', histtype = 'step', range = (rmin, rmax))
plt.hist(MC_zb_predict_qmodel[4], density = 1, bins = nbins, label = 'ZPrime->qq', histtype = 'step', range = (rmin, rmax))
plt.hist(MC_zb_predict_qmodel[5], density = 1, bins = nbins, label = 'H->2LongLived->4b', histtype = 'step', range = (rmin, rmax))
for i in range(3,3):
    plt.hist(MC_zb_predict_qmodel[i], density = 1, bins = nbins, label = MC_files[i], histtype = 'step', range = (rmin, rmax))
#plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
#plt.legend(loc='center left', bbox_to_anchor=(0.57, 0.5))
plt.legend(loc='upper right')
plt.xlabel("loss")
#plt.xticks(np.arange(rmin, rmax, step = 0.0002))
plt.show()

# TensorBoard (less useful)

In [None]:
%load_ext tensorboard
rm -rf ./logs/
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = "logs/fit", histogram_freq = 1)
%tensorboard --logdir logs/fit

# ROC plotting

### Assigning labels and arranging for ROC plotting

In [None]:
#Baseline
#Assuming only the mean ZB is learned
#Take mean ZB as outputs no matter what inputs are
#Classifier of baseline = MSE(inputs, ZeroBias_mean)
ZeroBias_mean = np.mean(ZeroBias, axis = 0)

baseline_zb = np.mean((X_test - ZeroBias_mean)**2, axis = (1, 2))
baseline_mc = []
for i in range(len(MC_zb)):
    baseline_mc.append(np.mean((MC_zb[i] - ZeroBias_mean)**2, axis = (1, 2)))

In [None]:
#Assign labels for various signals (y = 1) and backgrounds (y = 0)
Y_zb = np.zeros((X_test.shape[0], 1))
Y_mc = []
for i in range(len(MC)):
    Y_mc.append(np.ones((MC_zb[i].shape[0], 1)))

#Concatenate datasets to make ROC curves, i.e. QCD/SingleNu/signals vs ZB

#True labels
Y_true = []
#Baseline scores
Y_baseline = []
#Model scores
Y_model = []
#Student model scores
Y_student = []
#Qmodel scores
#Y_qmodel = []
for i in range(len(MC)):
    Y_true.append(np.concatenate((Y_mc[i], Y_zb)))
    Y_baseline.append(np.concatenate((baseline_mc[i], baseline_zb)))
    Y_model.append(np.concatenate((MC_zb_loss[i], X_test_loss)))
    #Y_student.append(np.concatenate((MC_zb_predict_student[i], X_test_predict_student)))
    #Y_qmodel.append(np.concatenate((MC_zb_predict_qmodel[i], X_test_predict_qmodel)))

### Baseline ROC

In [None]:
plt.figure(figsize = (13, 13))
axes = plt.subplot(2, 2, 1)
fpr_baseline = []
tpr_baseline = []
thresholds_baseline = []
roc_auc_baseline = []
for i in range(len(MC)):
    fpr_baseline.append(np.empty((Y_true[i].shape[0],1)))
    tpr_baseline.append(np.empty((Y_true[i].shape[0],1)))
    thresholds_baseline.append(np.empty((Y_true[i].shape[0],1)))
    roc_auc_baseline.append(np.empty((Y_true[i].shape[0],1)))
    fpr_baseline[i], tpr_baseline[i], thresholds_baseline[i] = roc_curve(Y_true[i], Y_baseline[i])
    roc_auc_baseline[i] = auc(fpr_baseline[i], tpr_baseline[i])
    if i == 0:
        axes.plot(fpr_baseline[i], tpr_baseline[i], linestyle = '--', color = 'r', lw = 1, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_baseline[i]))
    elif i == 1 or i == 2:
        print(1)
        #axes.plot(fpr_baseline[i], tpr_baseline[i], linestyle = ':', lw = 1, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_baseline[i]))
    else:
        axes.plot(fpr_baseline[i], tpr_baseline[i], linestyle = '-', lw = 1, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_baseline[i]))
#axes.plot([0, 1], [0, 1], linestyle = '--', lw = 1, color = 'black', label = 'random chance')
axes.plot([0.002, 0.002], [0, 1], linestyle = '--', lw = 1, color = 'black', label = 'FPR = 0.2% ~ (100 kHz)/(ZB rate)')
axes.set_xlim([0.00001, 1.0])
#axes.set_xlim([0, 1.0])
axes.set_ylim([0, 1.0])
#axes.set_ylim([0.9, 1.0])
axes.set_xscale(value = "log")
#axes.set_yscale(value = "log")
axes.set_xlabel('False Positive Rate (FPR)')
axes.set_ylabel('True Positive Rate (TPR)')
axes.set_title('Baseline ROC')
axes.legend(loc='center left', bbox_to_anchor = (1.15, 0.5))
plt.show()

### Teacher model ROC

In [None]:
plt.figure(figsize = (13, 13))
axes = plt.subplot(2, 2, 1)
fpr_model = []
tpr_model = []
thresholds_model = []
roc_auc_model = []
for i in range(len(MC)):
    fpr_model.append(np.empty((Y_true[i].shape[0],1)))
    tpr_model.append(np.empty((Y_true[i].shape[0],1)))
    thresholds_model.append(np.empty((Y_true[i].shape[0],1)))
    roc_auc_model.append(np.empty((Y_true[i].shape[0],1)))
    fpr_model[i], tpr_model[i], thresholds_model[i] = roc_curve(Y_true[i], Y_model[i])
    roc_auc_model[i] = auc(fpr_model[i], tpr_model[i])
    if i == 0:
        axes.plot(fpr_model[i], tpr_model[i], linestyle = '--', color = 'r', lw = 1, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_model[i]))
    elif i == 1 or i == 2:
        print(1)
        #axes.plot(fpr_model[i], tpr_model[i], linestyle = ':', lw = 1, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_model[i]))
    else:
        axes.plot(fpr_model[i], tpr_model[i], linestyle = '-', lw = 1, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_model[i]))
#axes.plot([0, 1], [0, 1], linestyle = '--', lw = 1, color = 'black', label = 'random chance')
axes.plot([0.002, 0.002], [0, 1], linestyle = '--', lw = 1, color = 'black', label = 'FPR = 0.2% ~ (100 kHz)/(ZB rate)')
axes.set_xlim([0.00001, 1.0])
#axes.set_xlim([0, 1.0])
axes.set_ylim([0, 1.0])
#axes.set_ylim([0.9, 1.0])
axes.set_xscale(value = "log")
#axes.set_yscale(value = "log")
axes.set_xlabel('False Positive Rate (FPR)')
axes.set_ylabel('True Positive Rate (TPR)')
axes.set_title('Model ROC')
axes.legend(loc='center left', bbox_to_anchor = (1.15, 0.5))
plt.show()

### Student model ROC

In [None]:
plt.figure(figsize = (13, 13))
axes = plt.subplot(2, 2, 1)
fpr_student = []
tpr_student = []
thresholds_student = []
roc_auc_student = []
for i in range(len(MC)):
    fpr_student.append(np.empty((Y_true[i].shape[0],1)))
    tpr_student.append(np.empty((Y_true[i].shape[0],1)))
    thresholds_student.append(np.empty((Y_true[i].shape[0],1)))
    roc_auc_student.append(np.empty((Y_true[i].shape[0],1)))
    fpr_student[i], tpr_student[i], thresholds_student[i] = roc_curve(Y_true[i], Y_student[i])
    roc_auc_student[i] = auc(fpr_student[i], tpr_student[i])
    if i == 0:
        axes.plot(fpr_student[i], tpr_student[i], linestyle = '--', color = 'r', lw = 1, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_student[i]))
    elif i == 1 or i == 2:
        axes.plot(fpr_student[i], tpr_student[i], linestyle = ':', lw = 1, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_student[i]))
    else:
        axes.plot(fpr_student[i], tpr_student[i], linestyle = '-', lw = 1, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_student[i]))
#axes.plot([0, 1], [0, 1], linestyle = '--', lw = 1, color = 'black', label = 'random chance')
axes.plot([0.002, 0.002], [0, 1], linestyle = '--', lw = 1, color = 'black', label = 'FPR = 0.2% ~ (100 kHz)/(ZB rate)')
axes.set_xlim([0.0001, 1.0])
#axes.set_xlim([0, 1.0])
axes.set_ylim([0, 1.0])
#axes.set_ylim([0.1, 1.0])
axes.set_xscale(value = "log")
#axes.set_yscale(value = "log")
axes.set_xlabel('False Positive Rate (FPR)')
axes.set_ylabel('True Positive Rate (TPR)')
axes.set_title('Student model ROC')
axes.legend(loc='center left', bbox_to_anchor = (1.15, 0.5))
plt.show()

## Quantized student model ROC

In [None]:
plt.figure(figsize = (13, 13))
axes = plt.subplot(2, 2, 1)
fpr_qmodel = []
tpr_qmodel = []
thresholds_qmodel = []
roc_auc_qmodel = []
#for i in range(len(MC)-1):
for i in range(len(MC)):
    fpr_qmodel.append(np.empty((Y_true[i].shape[0],1)))
    tpr_qmodel.append(np.empty((Y_true[i].shape[0],1)))
    thresholds_qmodel.append(np.empty((Y_true[i].shape[0],1)))
    roc_auc_qmodel.append(np.empty((Y_true[i].shape[0],1)))
    fpr_qmodel[i], tpr_qmodel[i], thresholds_qmodel[i] = roc_curve(Y_true[i], Y_qmodel[i])
    roc_auc_qmodel[i] = auc(fpr_qmodel[i], tpr_qmodel[i])
    if i == 0:
        print(1)
        #axes.plot(fpr_qmodel[i], tpr_qmodel[i], linestyle = '--', color = 'r', lw = 2, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_qmodel[i]))
    elif i == 1 or i == 2:
        #axes.plot(fpr_qmodel[i], tpr_qmodel[i], linestyle = ':', lw = 2, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_qmodel[i]))
        print(1)
    else:
        #axes.plot(fpr_qmodel[i], tpr_qmodel[i], linestyle = '-', lw = 2, label = MC_files[i] + ' (AUC = %.8f)' % (roc_auc_qmodel[i]))
        print(1)
axes.plot(fpr_qmodel[3], tpr_qmodel[3], linestyle = '-', lw = 2, label = 'SM HH->4b' + ' (AUC = %.5f)' % (roc_auc_qmodel[3]))
axes.plot(fpr_qmodel[4], tpr_qmodel[4], linestyle = '-', lw = 2, label = 'ZPrime->qq' + ' (AUC = %.5f)' % (roc_auc_qmodel[4]))
axes.plot(fpr_qmodel[5], tpr_qmodel[5], linestyle = '-', lw = 2, label = 'H->2LongLived->4b' + ' (AUC = %.5f)' % (roc_auc_qmodel[5]))
#axes.plot([0, 1], [0, 1], linestyle = '--', lw = 2, color = 'black', label = 'random chance')
axes.plot([0.002, 0.002], [0, 1], linestyle = '--', lw = 2, color = 'black', label = 'FPR = 0.2% ~ (100 kHz)/(ZB rate)')
axes.set_xlim([0.00001, 1.0])
#axes.set_xlim([0, 1.0])
axes.set_ylim([0.0, 1.0])
#axes.set_ylim([0.1, 1.0])
axes.set_xscale(value = "log")
#axes.set_yscale(value = "log")
axes.set_xlabel('False Positive Rate (FPR)')
axes.set_ylabel('True Positive Rate (TPR)')
axes.set_title('Quantized student model ROC')
#axes.legend(loc='center left', bbox_to_anchor = (1.15, 0.5))
axes.legend(loc='center left', bbox_to_anchor = (0.33, 0.5))
plt.show()

### Tabulating TPR at fixed FPR = 0.2% (baseline, model, change).

In [None]:
table_tpr_baseline = []
table_tpr_model = []
table_tpr_student = []
table_tpr_qmodel = []
table_tpr_change = []
for i in range(len(fpr_baseline)):
    for j in range(len(fpr_baseline[i])):
        if fpr_baseline[i][j] > 0.002:
            table_tpr_baseline.append(tpr_baseline[i][j] * 100)
            break
    for j in range(len(fpr_model[i])):
        if fpr_model[i][j] > 0.002:
            table_tpr_model.append(tpr_model[i][j] * 100)
            break
    for j in range(len(fpr_student[i])):
        if fpr_student[i][j] > 0.002:
            table_tpr_student.append(tpr_student[i][j] * 100)
            break
    for j in range(len(fpr_qmodel[i])):
        if fpr_qmodel[i][j] > 0.002:
            table_tpr_qmodel.append(tpr_qmodel[i][j] * 100)
            break

for i in range(len(MC)):
    #table_tpr_change.append(100 * (table_tpr_model[i] - table_tpr_baseline[i])/table_tpr_baseline[i])
    table_tpr_change.append(-table_tpr_student[i] + table_tpr_qmodel[i])

table_tpr = pd.DataFrame({'Baseline': table_tpr_baseline,
                          'CNN AE (teacher)': table_tpr_model,
                          'Dense (student)': table_tpr_student,
                          'qDense (qstudent)': table_tpr_qmodel,
                          'delta(qDense, Dense)': table_tpr_change},
                        index = MC_files)
table_tpr = table_tpr.sort_values(by = 'delta(qDense, Dense)', ascending = False)

pd.set_option('display.max_colwidth', None)
table_tpr

In [None]:
@@@@@@@@@@@@

In [None]:
compare_old = tf.keras.models.load_model('saved_models/compare_old')
compare_old.summary()
compare_old_flat = tf.keras.models.load_model('saved_models/compare_old_flat')
compare_old_flat.summary()
compare_flat40 = tf.keras.models.load_model('saved_models/compare_flat40')
compare_flat40.summary()
compare_flat80 = tf.keras.models.load_model('saved_models/compare_flat80')
compare_flat80.summary()
compare_flat120 = tf.keras.models.load_model('saved_models/compare_flat120')
compare_flat120.summary()
compare_flat160 = tf.keras.models.load_model('saved_models/compare_flat160')
compare_flat160.summary()
compare_flat200 = tf.keras.models.load_model('saved_models/compare_flat200')
compare_flat200.summary()
compare_flatbig100 = tf.keras.models.load_model('saved_models/compare_flatbig100')
compare_flatbig100.summary()
compare_flatbig120 = tf.keras.models.load_model('saved_models/compare_flatbig120')
compare_flatbig120.summary()

In [None]:
X_test_old = compare_old.predict(X_test)
X_test_old_flat = compare_old_flat.predict(X_test)
X_test_flat40 = compare_flat40.predict(X_test)
X_test_flat80 = compare_flat80.predict(X_test)
X_test_flat120 = compare_flat120.predict(X_test)
X_test_flat160 = compare_flat160.predict(X_test)
X_test_flat200 = compare_flat200.predict(X_test)
X_test_flatbig100 = compare_flatbig100.predict(X_test)
X_test_flatbig120 = compare_flatbig120.predict(X_test)
MC_zb_old = []
MC_zb_old_flat = []
MC_zb_flat40 = []
MC_zb_flat80 = []
MC_zb_flat120 = []
MC_zb_flat160 = []
MC_zb_flat200 = []
MC_zb_flatbig100 = []
MC_zb_flatbig120 = []
for i in range(len(MC_zb)):
    MC_zb_old.append(compare_old.predict(MC_zb[i]))
    MC_zb_old_flat.append(compare_old_flat.predict(MC_zb[i]))
    MC_zb_flat40.append(compare_flat40.predict(MC_zb[i]))
    MC_zb_flat80.append(compare_flat80.predict(MC_zb[i]))
    MC_zb_flat120.append(compare_flat120.predict(MC_zb[i]))
    MC_zb_flat160.append(compare_flat160.predict(MC_zb[i]))
    MC_zb_flat200.append(compare_flat200.predict(MC_zb[i]))
    MC_zb_flatbig100.append(compare_flatbig100.predict(MC_zb[i]))
    MC_zb_flatbig120.append(compare_flatbig120.predict(MC_zb[i]))

In [None]:
loss_choice = 0

X_test_loss_old = custom_loss_for_pred(X_test, X_test_old, loss_choice)
X_test_loss_old_flat = custom_loss_for_pred(X_test, X_test_old_flat, loss_choice)
X_test_loss_flat40 = custom_loss_for_pred(X_test, X_test_flat40, loss_choice)
X_test_loss_flat80 = custom_loss_for_pred(X_test, X_test_flat80, loss_choice)
X_test_loss_flat120 = custom_loss_for_pred(X_test, X_test_flat120, loss_choice)
X_test_loss_flat160 = custom_loss_for_pred(X_test, X_test_flat160, loss_choice)
X_test_loss_flat200 = custom_loss_for_pred(X_test, X_test_flat200, loss_choice)
X_test_loss_flatbig100 = custom_loss_for_pred(X_test, X_test_flatbig100, loss_choice)
X_test_loss_flatbig120 = custom_loss_for_pred(X_test, X_test_flatbig120, loss_choice)

MC_zb_loss_old = []
MC_zb_loss_old_flat = []
MC_zb_loss_flat40 = []
MC_zb_loss_flat80 = []
MC_zb_loss_flat120 = []
MC_zb_loss_flat160 = []
MC_zb_loss_flat200 = []
MC_zb_loss_flatbig100 = []
MC_zb_loss_flatbig120 = []
for i in range(len(MC_zb)):
    MC_zb_loss_old.append(custom_loss_for_pred(MC_zb[i], MC_zb_old[i], loss_choice))
    MC_zb_loss_old_flat.append(custom_loss_for_pred(MC_zb[i], MC_zb_old_flat[i], loss_choice))
    MC_zb_loss_flat40.append(custom_loss_for_pred(MC_zb[i], MC_zb_flat40[i], loss_choice))
    MC_zb_loss_flat80.append(custom_loss_for_pred(MC_zb[i], MC_zb_flat80[i], loss_choice))
    MC_zb_loss_flat120.append(custom_loss_for_pred(MC_zb[i], MC_zb_flat120[i], loss_choice))
    MC_zb_loss_flat160.append(custom_loss_for_pred(MC_zb[i], MC_zb_flat160[i], loss_choice))
    MC_zb_loss_flat200.append(custom_loss_for_pred(MC_zb[i], MC_zb_flat200[i], loss_choice))
    MC_zb_loss_flatbig100.append(custom_loss_for_pred(MC_zb[i], MC_zb_flatbig100[i], loss_choice))
    MC_zb_loss_flatbig120.append(custom_loss_for_pred(MC_zb[i], MC_zb_flatbig120[i], loss_choice))

In [None]:
ZeroBias_mean = np.mean(ZeroBias, axis = 0)

baseline_zb = np.mean((X_test - ZeroBias_mean)**2, axis = (1, 2))
baseline_mc = []
for i in range(len(MC_zb)):
    baseline_mc.append(np.mean((MC_zb[i] - ZeroBias_mean)**2, axis = (1, 2)))

Y_zb = np.zeros((X_test.shape[0], 1))
Y_mc = []
for i in range(len(MC)):
    Y_mc.append(np.ones((MC_zb[i].shape[0], 1)))

Y_true = []
Y_baseline = []
Y_old = []
Y_old_flat = []
Y_flat40 = []
Y_flat80 = []
Y_flat120 = []
Y_flat160 = []
Y_flat200 = []
Y_flatbig100 = []
Y_flatbig120 = []
for i in range(len(MC)):
    Y_true.append(np.concatenate((Y_mc[i], Y_zb)))
    Y_baseline.append(np.concatenate((baseline_mc[i], baseline_zb)))
    Y_old.append(np.concatenate((MC_zb_loss_old[i], X_test_loss_old)))
    Y_old_flat.append(np.concatenate((MC_zb_loss_old_flat[i], X_test_loss_old_flat)))
    Y_flat40.append(np.concatenate((MC_zb_loss_flat40[i], X_test_loss_flat40)))
    Y_flat80.append(np.concatenate((MC_zb_loss_flat80[i], X_test_loss_flat80)))
    Y_flat120.append(np.concatenate((MC_zb_loss_flat120[i], X_test_loss_flat120)))
    Y_flat160.append(np.concatenate((MC_zb_loss_flat160[i], X_test_loss_flat160)))
    Y_flat200.append(np.concatenate((MC_zb_loss_flat200[i], X_test_loss_flat200)))
    Y_flatbig100.append(np.concatenate((MC_zb_loss_flatbig100[i], X_test_loss_flatbig100)))
    Y_flatbig120.append(np.concatenate((MC_zb_loss_flatbig120[i], X_test_loss_flatbig120)))

In [None]:
n = 6

plt.figure(figsize = (13, 13))
axes = plt.subplot(2, 2, 1)

fpr_baseline = np.empty((Y_true[n].shape[0],1))
tpr_baseline = np.empty((Y_true[n].shape[0],1))
thresholds_baseline = np.empty((Y_true[n].shape[0],1))
roc_auc_baseline = np.empty((Y_true[n].shape[0],1))

fpr_old = np.empty((Y_true[n].shape[0],1))
tpr_old = np.empty((Y_true[n].shape[0],1))
thresholds_old = np.empty((Y_true[n].shape[0],1))
roc_auc_old = np.empty((Y_true[n].shape[0],1))

fpr_old_flat = np.empty((Y_true[n].shape[0],1))
tpr_old_flat = np.empty((Y_true[n].shape[0],1))
thresholds_old_flat = np.empty((Y_true[n].shape[0],1))
roc_auc_old_flat = np.empty((Y_true[n].shape[0],1))

fpr_flat40 = np.empty((Y_true[n].shape[0],1))
tpr_flat40 = np.empty((Y_true[n].shape[0],1))
thresholds_flat40 = np.empty((Y_true[n].shape[0],1))
roc_auc_flat40 = np.empty((Y_true[n].shape[0],1))

fpr_flat80 = np.empty((Y_true[n].shape[0],1))
tpr_flat80 = np.empty((Y_true[n].shape[0],1))
thresholds_flat80 = np.empty((Y_true[n].shape[0],1))
roc_auc_flat80 = np.empty((Y_true[n].shape[0],1))

fpr_flat120 = np.empty((Y_true[n].shape[0],1))
tpr_flat120 = np.empty((Y_true[n].shape[0],1))
thresholds_flat120 = np.empty((Y_true[n].shape[0],1))
roc_auc_flat120 = np.empty((Y_true[n].shape[0],1))

fpr_flat160 = np.empty((Y_true[n].shape[0],1))
tpr_flat160 = np.empty((Y_true[n].shape[0],1))
thresholds_flat160 = np.empty((Y_true[n].shape[0],1))
roc_auc_flat160 = np.empty((Y_true[n].shape[0],1))

fpr_flat200 = np.empty((Y_true[n].shape[0],1))
tpr_flat200 = np.empty((Y_true[n].shape[0],1))
thresholds_flat200 = np.empty((Y_true[n].shape[0],1))
roc_auc_flat200 = np.empty((Y_true[n].shape[0],1))

fpr_flatbig100 = np.empty((Y_true[n].shape[0],1))
tpr_flatbig100 = np.empty((Y_true[n].shape[0],1))
thresholds_flatbig100 = np.empty((Y_true[n].shape[0],1))
roc_auc_flatbig100 = np.empty((Y_true[n].shape[0],1))

fpr_flatbig120 = np.empty((Y_true[n].shape[0],1))
tpr_flatbig120 = np.empty((Y_true[n].shape[0],1))
thresholds_flatbig120 = np.empty((Y_true[n].shape[0],1))
roc_auc_flatbig120 = np.empty((Y_true[n].shape[0],1))

fpr_baseline, tpr_baseline, thresholds_baseline = roc_curve(Y_true[n], Y_baseline[n])
roc_auc_baseline = auc(fpr_baseline, tpr_baseline)

fpr_old, tpr_old, thresholds_old = roc_curve(Y_true[n], Y_old[n])
roc_auc_old = auc(fpr_old, tpr_old)

fpr_old_flat, tpr_old_flat, thresholds_old_flat = roc_curve(Y_true[n], Y_old_flat[n])
roc_auc_old_flat = auc(fpr_old_flat, tpr_old_flat)

fpr_flat40, tpr_flat40, thresholds_flat40 = roc_curve(Y_true[n], Y_flat40[n])
roc_auc_flat40 = auc(fpr_flat40, tpr_flat40)

fpr_flat80, tpr_flat80, thresholds_flat80 = roc_curve(Y_true[n], Y_flat80[n])
roc_auc_flat80 = auc(fpr_flat80, tpr_flat80)

fpr_flat120, tpr_flat120, thresholds_flat120 = roc_curve(Y_true[n], Y_flat120[n])
roc_auc_flat120 = auc(fpr_flat120, tpr_flat120)

fpr_flat160, tpr_flat160, thresholds_flat160 = roc_curve(Y_true[n], Y_flat160[n])
roc_auc_flat160 = auc(fpr_flat160, tpr_flat160)

fpr_flat200, tpr_flat200, thresholds_flat200 = roc_curve(Y_true[n], Y_flat200[n])
roc_auc_flat200 = auc(fpr_flat200, tpr_flat200)

fpr_flatbig100, tpr_flatbig100, thresholds_flatbig100 = roc_curve(Y_true[n], Y_flatbig100[n])
roc_auc_flatbig100 = auc(fpr_flatbig100, tpr_flatbig100)

fpr_flatbig120, tpr_flatbig120, thresholds_flatbig120 = roc_curve(Y_true[n], Y_flatbig120[n])
roc_auc_flatbig120 = auc(fpr_flatbig120, tpr_flatbig120)

lw=3.5

axes.plot(fpr_baseline, tpr_baseline, linestyle = '--', lw = lw, label = 'Cut-flow baseline (AUC = %.4f)' % (roc_auc_baseline))
axes.plot(fpr_old, tpr_old, linestyle = '--', lw = lw, label = 'Old CNN AE, 2D latent space = 9x7 (AUC = %.4f)' % (roc_auc_old))
axes.plot(fpr_flat40, tpr_flat40, linestyle = '-', lw = lw, label = 'CNN AE, flattened latent space = 40 (AUC = %.4f)' % (roc_auc_flat40))
axes.plot(fpr_old_flat, tpr_old_flat, linestyle = '-', lw = lw, label = 'CNN AE, flattened latent space = 63 (AUC = %.4f)' % (roc_auc_old_flat))
axes.plot(fpr_flat80, tpr_flat80, linestyle = '-', lw = lw, label = 'CNN AE, flattened latent space = 80 (AUC = %.4f)' % (roc_auc_flat80))
#axes.plot(fpr_flat120, tpr_flat120, linestyle = '-', lw = lw, label = 'CNN AE, flattened latent space = 120 (AUC = %.4f)' % (roc_auc_flat120))
axes.plot(fpr_flat160, tpr_flat160, linestyle = '-', color = 'blue', lw = lw, label = 'CNN AE, flattened latent space = 160 (AUC = %.4f)' % (roc_auc_flat160))
axes.plot(fpr_flat200, tpr_flat200, linestyle = '-', color = 'pink', lw = lw, label = 'CNN AE, flattened latent space = 200 (AUC = %.4f)' % (roc_auc_flat200))
#axes.plot(fpr_flatbig100, tpr_flatbig100, linestyle = '-', lw = lw, label = 'Big CNN AE, flattened latent space = 100 (AUC = %.5f)' % (roc_auc_flatbig100))
#axes.plot(fpr_flatbig120, tpr_flatbig120, linestyle = '-', lw = lw, label = 'Big CNN AE, flattened latent space = 120 (AUC = %.5f)' % (roc_auc_flatbig120))

axes.plot([0.002, 0.002], [0, 1], linestyle = '--', lw = 1, color = 'black', label = 'FPR = 0.2% ~ (100 kHz)/(ZB rate)')
axes.set_xlim([0.00001, 1.0])
#axes.set_xlim([0, 1.0])
axes.set_ylim([0., 1.0])
#axes.set_ylim([0.9, 1.0])
axes.set_xscale(value = "log")
#axes.set_yscale(value = "log")
axes.set_xlabel('False Positive Rate (FPR)')
axes.set_ylabel('True Positive Rate (TPR)')
axes.set_title(MC_files[n] + ' vs ZB')
axes.legend(loc='center left', bbox_to_anchor = (0.6, 0.5))
plt.show()