In [171]:
import ROOT
import math
# Colors
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.ticker import AutoMinorLocator, MultipleLocator
from root_numpy import root2array, tree2array
from root_numpy import testdata, fill_hist
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (recall_score,  precision_score, f1_score, roc_auc_score,
                             make_scorer, confusion_matrix, accuracy_score)

## Import ROOT file

You can either import a file which includes both Xtohh1000 and Xtohh2000 signal events, but their samples must be different or import just the file which includes one of the signals and the background.

In [2]:
rfile = ROOT.TFile("/home/andrea/Escritorio/CERN data/Try3/all_1000.root")
intree = rfile.Get("Nominal")
array = tree2array(intree)
df = pd.DataFrame(array)
df = df[df['sample'] != 'data']
df.columns.values

array(['sample', 'EventWeight', 'EventNumber', 'm_region',
       'm_FJNbtagJets', 'm_FJpt', 'm_FJeta', 'm_FJphi', 'm_FJm', 'm_DTpt',
       'm_DTeta', 'm_DTphi', 'm_DTm', 'm_dPhiFTwDT', 'm_dRFJwDT',
       'm_dPhiDTwMET', 'm_MET', 'm_hhm', 'm_bbttpt'], dtype=object)

## Identifying signal and background

If the row sample is 'Xtohh1000' or 'Xtohh2000', then the new columns will have a 1 in this row.

In [3]:
def classifier(row):
    if row['sample'] == 'Xtohh1000':
        return 1
    #else if row['sample'] == 'Xtohh2000':
    #    return 1
    else:
        return 0

In [4]:
# Select rows different of data samples
# Select rows on the SR_1tag region
df = df[(df['sample']!='data') & (df['m_region']=='SR_1tag')]
df["signal"] = df.apply(classifier, axis=1)

# Delete columns
not_cons = ['sample', 'EventWeight', 'EventNumber', 'm_region', 'm_FJNbtagJets', 'm_FJphi', 'm_FJeta', 'm_DTeta', 'm_DTphi']
df.drop(not_cons, axis=1, inplace=True)
df

Unnamed: 0,m_FJpt,m_FJm,m_DTpt,m_DTm,m_dPhiFTwDT,m_dRFJwDT,m_dPhiDTwMET,m_MET,m_hhm,m_bbttpt,signal
157,371.481293,10326.322266,514.951416,165099.500000,3.040748,3.441318,-0.236810,30.293037,1193.056519,150.091354,0
159,671.963440,6808.773926,648.846802,165977.250000,3.086895,3.087288,0.184433,25.921062,1341.615479,42.877872,0
349,468.992157,16738.765625,436.591125,85872.664062,3.135852,3.295279,0.094527,52.840870,1029.941772,32.505005,0
383,460.490295,25541.880859,344.878723,131553.406250,2.889410,2.926831,-0.652137,32.617325,841.982971,153.011383,0
385,549.088257,45904.292969,506.746826,95809.492188,3.103930,3.194793,0.390848,65.572906,1143.339966,46.770031,0
419,567.861572,32648.080078,552.612671,139901.515625,3.104266,3.111501,-0.754485,29.792727,1143.394165,25.878639,0
503,528.577332,15937.913086,437.923706,185447.890625,3.086575,3.224717,0.313445,60.506413,1121.502808,94.438171,0
619,494.375610,23339.521484,448.691528,147805.500000,3.131119,3.245680,0.535443,37.519505,1063.593628,45.949661,0
661,478.946838,140186.984375,583.283447,169675.593750,2.974471,3.631285,0.107598,10.215162,1710.350708,136.639877,0
665,526.373291,165002.656250,379.679871,152255.546875,2.933894,3.215542,-0.050245,61.182739,1138.267212,173.520752,0


## Dividing data for training and testing

In [132]:
# Features
feature_cols = df.columns.values[:-1]

X = df.loc[:, feature_cols].values
# Targets
y = df['signal'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X_train.size 

102520

## Configure neural network

In [172]:
classifier = Sequential()
#First Hidden Layer
classifier.add(Dense(15, activation='relu', kernel_initializer='uniform', input_dim=10))
#Second  Hidden Layer
classifier.add(Dense(15, activation='relu', kernel_initializer='uniform'))
#Third  Hidden Layer
classifier.add(Dense(15, activation='relu', kernel_initializer='uniform'))
#Output Layer
classifier.add(Dense(1, activation='sigmoid', kernel_initializer='uniform'))

In [173]:
#Compiling the neural network
classifier.compile(optimizer ='adam',loss='binary_crossentropy', metrics =['accuracy'])

In [189]:
#Fitting the data to the training dataset
classifier.fit(X_train,y_train, batch_size=50, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f394e3b0550>

In [190]:
eval_model=classifier.evaluate(X_train, y_train)
eval_model



[0.44606970313397226, 0.789992196667813]

In [191]:
y_pred=classifier.predict(X_test)
y_pred =(y_pred>0.5)

In [192]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[1623  215]
 [ 343  382]]


In [193]:
r = recall_score(y_test,y_pred)
p = precision_score(y_test,y_pred)
a = accuracy_score(y_test,y_pred)

print("recall:", r)
print("precision:", p)
print("accuracy:", a)

recall: 0.526896551724138
precision: 0.6398659966499163
accuracy: 0.7822863831447523


In [203]:
y_test[:20]

array([0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0])

In [202]:
y_pred[:20]

array([[False],
       [False],
       [ True],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [ True],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [ True],
       [False]])