In [None]:
import sys
import ROOT
import numpy as np
import pandas as pd
import root_pandas
import seaborn as sb
import matplotlib.pyplot as plt
import uproot
import time

from itertools import product

#from root_numpy import root2array

from keras.models import Sequential
from keras.layers import Dense

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score, auc

from bayes_opt import BayesianOptimization
from sklearn.preprocessing import QuantileTransformer
import pickle

In [None]:
#########################################
### PREPARE DFs FOR TRAINING AND TEST ###
#########################################


features = [
    'muE_muSysRf', # unpaired muon energy in the rest frame of the jpsi muons sys
    'pT_var', # difference between the jpsi muons pT
    'jpsi_mass_diffReco', # mu_sys.Mag() - jpsi_mass
    'deltaR_mu1mu2', # deltaR between the two muon reconstructing the jpsi
    'mu_pt'     ,
    'mu_eta'    ,
    'mu_phi'    ,
    'mu1_pt'    ,
    'mu1_eta'   ,
    'mu1_phi'   ,
    'mu2_pt'    ,
    'mu2_eta'   ,
    'mu2_phi'   ,
]

mu_events = pd.read_csv('mu_events_NOCUT.csv')
tau_events = pd.read_csv('tau_events_NOCUT.csv')
bkg_events = pd.read_csv('bkg_events_NOCUT.csv')

mu_CUT = pd.read_csv('mu_events_CUT.csv')
tau_CUT = pd.read_csv('tau_events_CUT.csv')
bkg_CUT = pd.read_csv('bkg_events_CUT.csv')

# drop the events corresponding to those that we will evaluate our NN on
# we do it because they are few and we want to avoid training in the same dataset on which we will evaluate
mu_events = mu_events.drop(mu_CUT['tree_index'], axis=0)
tau_events = tau_events.drop(tau_CUT['tree_index'], axis=0)
bkg_events = bkg_events.drop(bkg_CUT['tree_index'], axis=0)

# concatenate the two samples
dataset = pd.concat([mu_events, tau_events, bkg_events], sort=False)

# shuffle and split train/test
train, test = train_test_split(dataset, test_size=0.85, random_state=1986, shuffle=True)

# X and Y on the training sample
X = pd.DataFrame(train, columns=features)
Y = pd.DataFrame(train, columns=['target_bkgNN'])

In [None]:
print bkg_events

In [None]:
##################
### PREPROCESS ###
##################


qt = QuantileTransformer(output_distribution='normal', random_state=1986)
qt.fit(X[features])
transformedX = qt.transform(X[features])
pickle.dump( qt, open( 'quantile_tranformation_bkgNN.pck', 'w' ) )

In [None]:
####################################################
### FUNCTION FOR BAYESIAN OPTIMIZATION OF THE NN ###
####################################################


def BO_function(features,transformedX,Y,test,pbounds,init_points,n_iter):
    start = time.time()
    def NN_function(n_layers,units_perlayer,batch_size):
    
        #optimizer_fcts = ['nadam', 'adamax', 'adam', 'adadelta', 'adagrad', 'rmsprop', 'sgd']
        #activation_fcts = ['softmax', 'elu', 'selu', 'relu', 'softplus', 'softsign', 'tanh', 'sigmoid', 
        #                   'hard_sigmoid', 'exponential']
        #metrics_fcts = ['binary_accuracy', 'categorical_accuracy', 'sparse_categorical_accuracy', 
        #                'top_k_categorical_accuracy', 'sparse_top_k_categorical_accuracy']
        #loss_fcts = ['mean_squared_error', 'mean_absolute_percentage_error', 'mean_squared_logarithmic_error',
        #            'squared_hinge', 'hinge', 'categorical_hinge', 'logcosh', 'categorical_crossentropy',
        #            'sparse_categorical_crossentropy', 'binary_crossentropy', 'kullback_leibler_divergence',
        #            'poisson', 'cosine_proximity']
        
        # I want units_perlayer to be a multiple of 2 -> I always take le lower multiple of 2 starting from the
        # float that the algorith is giving me
        if units_perlayer%2 < 1:
            units_perlayer = int(units_perlayer)
        else:
            units_perlayer = int(units_perlayer-1)

        # define the model
        model = Sequential()
        for i in range(int(n_layers)):
            model.add(Dense(units_perlayer, input_dim=len(features),activation='relu'))
        model.add(Dense(1,activation='sigmoid'))

        # compile the model
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

        # fit the model
        model.fit(transformedX, Y, epochs=10, batch_size=int(batch_size), validation_split=0.5,verbose=0)

        # evaluate the model
        scores = model.evaluate(transformedX, Y,verbose=0)

        # calculate predictions on the test sample
        x = pd.DataFrame(test, columns=features)
        qt = pickle.load(open( 'quantile_tranformation_bkgNN.pck', 'r' ))
        transformedx = qt.transform(x[features])
        y = model.predict(transformedx)
    
        # create this random in order to avoid repetitions in the insertion of the score of the NN (nnS)
        k = np.random.normal(100,20)
        
        # add the score to the test sample dataframe
        test.insert(len(test.columns), 'nnS'+str(k), y)

        # let sklearn do the heavy lifting and compute the ROC curves for you
        fpr, tpr, wps = roc_curve(test.target_bkgNN, test['nnS'+str(k)])

        # compute the auc
        auroc = auc(fpr, tpr)

        # compute Gini index
        gini_index = (auroc-0.5)*2
        
        return np.log((auroc*gini_index*scores[1])**4)

    optimizer = BayesianOptimization(
        f = NN_function,
        pbounds = pbounds,
    )
    
    # optimize
    optimizer.maximize(
        init_points=init_points,
        n_iter=n_iter,
        alpha = 1e-3
    )
    
    print optimizer.max
    
    end = time.time()
    print 'Running time of the Bayesian Optimization = %.1f'%(end - start)

In [None]:
#########################################
### OPTIMIZE THE PARAMETERS OF THE NN ###
#########################################


# bounded region of parameter space
pbounds = {'n_layers': (4, 20), 'units_perlayer': (16, 512), 'batch_size': (5, 2000)}
# , 'optimizer': (0,7), 'activation': (0,10), 'loss': (0,13)




# call the function that creates the optimizer
BO_function(features,transformedX,Y,test,pbounds,20,30)