In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import optimizers
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from sklearn.model_selection import train_test_split
from time import time
from os import environ
from visualize import trainingPlots, discPlot, ROC_curve 
import visualize
from tensorflow.keras.utils import plot_model
# import keras.utils as ker_u
#environ['KERAS_BACKEND'] = 'tensorflow'  # needed on Wisconsin cluster
from keras import backend as K
import numpy as np
import keras
from tensorflow.keras.utils import to_categorical 
from keras.layers import Layer
import matplotlib.pyplot as plt
import statistics

In [None]:
def build_model(nvars, model_name):
    model = Sequential()
    model.add(Dense(14, input_shape=(nvars,), name='input', activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, name='output', activation='sigmoid', kernel_initializer='normal'))
    print(model.summary())
    model.compile(optimizer='adam', loss='binary_crossentropy',
                  metrics=['accuracy'])    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=18),
        ModelCheckpoint('/localdata/Athar/models/{}.hdf5'.format(model_name), monitor='val_loss',
                        verbose=0, save_best_only=True,
                        save_weights_only=False, mode='auto',
                        period=1
                        ),
        TensorBoard(log_dir="/localdata/Athar/logs/{}".format(time()), histogram_freq=200, write_grads=False, write_images=True)
    ]
    return model, callbacks

In [None]:
data = pd.HDFStore('/localdata/Athar/datasets/preprocessed_dataset.h5',mode="r+")['nominal'] # open dataframe
print("Should be here")
print(data.columns)
training_variables = ['pxB1', 'pyB1', 'pzB1', 'eB1', 'pxB2', 'pyB2', 'pzB2', 'eB2', 'pxJ1', 'pyJ1', 'pzJ1', 'eJ1',
              'pxJ2', 'pyJ2', 'pzJ2', 'eJ2', 'pxL1', 'pyL1', 'pzL1', 'eL1', 'pxN1', 'pyN1', 'pzN1', 'eN1',
              'pxH', 'pyH', 'pzH', 'eH', 'pxt11', 'pyt11', 'pzt11', 'et11', 'pxt12', 'pyt12', 'pzt12',
              'et12', 'pxt21', 'pyt21', 'pzt21', 'et21', 'pxt22', 'pyt22', 'pzt22', 'et22', 'pxW1', 'pyW1', 
              'pzW1', 'eW1', 'pxW2', 'pyW2', 'pzW2', 'eW2', 'm_B1', 'pt_B1', 'eta_B1', 'phi_B1', 'm_B2', 
              'pt_B2', 'eta_B2', 'phi_B2', 'm_J1', 'pt_J1', 'eta_J1', 'phi_J1', 'm_J2', 'pt_J2', 'eta_J2', 
              'phi_J2', 'm_L1', 'pt_L1', 'eta_L1', 'phi_L1', 'm_N1', 'pt_N1', 'eta_N1', 'phi_N1', 'm_H', 
              'pt_H', 'eta_H', 'phi_H', 'm_t11', 'pt_t11', 'eta_t11', 'phi_t11', 'm_t12', 'pt_t12', 
              'eta_t12', 'phi_t12', 'm_t21', 'pt_t21', 'eta_t21', 'phi_t21', 'm_t22', 'pt_t22', 'eta_t22', 
              'phi_t22', 'm_W1', 'pt_W1', 'eta_W1', 'phi_W1', 'm_W2', 'pt_W2', 'eta_W2', 'phi_W2'
             ]
print ("BUILD_MODEL")
nvars = len(training_variables)
name = 'model_name'
model, callbacks = build_model(nvars,name)
sig_df = data[(data['signal'] == 1.0)]
bkg_df = data[(data['signal'] == 0.0)]
print ('No. Signal Events:     {}'.format(len(sig_df)))
print ('No. Background Events: {}'.format(len(bkg_df)))
scaleto = max(len(sig_df), len(bkg_df))
selected_events = pd.concat([sig_df, bkg_df])
print("len(sig_df) = ",len(sig_df))
print("len(bkg_df) = ",len(bkg_df))
training_dataframe = selected_events[training_variables + ['signal']]
print ("TRAIN_TEST_SPLIT")
training_data, testing_data, training_labels, testing_labels= train_test_split(
    training_dataframe[training_variables].values
    ,training_dataframe['signal'].values,test_size=0.05, random_state=7)
print ("WILL PLOT")
print ("START TRAINING")
history = model.fit(training_data, training_labels, shuffle=True,
              epochs=500, batch_size=4096, verbose=True,
              callbacks=callbacks, validation_split=0.25)
plot_model(model, to_file='/localdata/Athar/plots/{}.png'.format(name)) 
print ("PLOT ROC_curve")
ROC_curve(training_data, training_labels, model, 'ROC_training_{}'.format(name), 'red')
print ("PLOT ROC_curve")
ROC_curve(testing_data, testing_labels, model, 'ROC_testing_{}'.format(name), 'cyan')
print ("PLOT training")
trainingPlots(history, 'trainingPlot_{}'.format(name))
print ("PLOTTING...")
test_sig, test_bkg = [], []
for i in range(len(testing_labels)):
        if testing_labels[i] == 1:
            test_sig.append(testing_data[i, :])
        elif testing_labels[i] == 0:
            test_bkg.append(testing_data[i, :])
train_sig, train_bkg = [], []
for i in range(len(training_labels)):
        if training_labels[i] == 1:
            train_sig.append(training_data[i, :])
        elif training_labels[i] == 0:
            train_bkg.append(training_data[i, :])

print("test_signal: ",np.shape(test_sig))
print("train_signal:",np.shape(train_sig))
print ("discPlot")
discPlot('NN_disc_{}'.format(name), model, np.array(train_sig),
             np.array(train_bkg), np.array(test_sig), np.array(test_bkg))