# Signal vs. background classification in the double-escape peak

In this notebook we read in the prepared data, construct and train the DNN, and then evaluate its performance.

In [1]:
%matplotlib inline

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import matplotlib.pyplot as plt
import numpy  as np
import random as rd
import tables as tb
import h5py
import tensorflow as tf

from matplotlib.patches         import Ellipse
from __future__  import print_function

# Keras imports
import keras.backend.tensorflow_backend as K
from keras.models               import Model, load_model
from keras.layers               import Input, Dense, MaxPooling3D, AveragePooling3D, Conv3D, Conv2D, AveragePooling2D, Activation, Dropout, merge
from keras.layers.normalization import BatchNormalization
from keras.optimizers           import SGD, Adam, Nadam         
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core          import Flatten
from keras                      import callbacks
from keras.regularizers         import l2, l1
from keras.initializers         import RandomNormal
from keras.utils.layer_utils    import print_summary
from keras                      import regularizers

  return f(*args, **kwds)
Using TensorFlow backend.


# Variable definitions
Here we define key variables to be used throughout the notebook.  Note that we will read the data from a directory `data_location/run_name`, and it is stored in multiple files:
- The training data will consist of the events stored in files from `train_fstart` to `train_fend`
- The test data will consist of the events stored in files from `test_fstart` to `test_fend`

In [2]:
# data dimensions
xdim = 48
ydim = 48
zdim = 48

# data location and training/test file numbers
data_location = "/data/fastmc/descape/classification"
run_name = "data"
train_fstart = 0
train_fend = 45
test_fstart = 45
test_fend = 50

evt_limit = 15000

## Function definitions

### Data input functions

In [3]:
# define the function to read the data from multiple files
def read_data(loc, rname, f_start, f_end, fval):
    """Reads all events from the files with the specified file numbers."""
    
    # read in the signal events.
    print("Reading signal events...")
    for fn in range(f_start,f_end):
        s_dat = tb.open_file("{0}/{1}/vox_dnn_Xe_EPEM_7bar_descape_2x2x2_out_{2}.h5".format(loc,rname,fn), 'r')
        if(fn == f_start):
            s_array = np.array(s_dat.root.maps)
            s_energies = np.array(s_dat.root.energies)
            print("-- Reading file {0},".format(fn), end=' ')
        else:
            print("{0},".format(fn), end=' ')
            s_array = np.concatenate([s_array,np.array(s_dat.root.maps)])
            s_energies = np.concatenate([s_energies,np.array(s_dat.root.energies)])
    print("done.")
    if(evt_limit > 0):
        s_array = s_array[0:evt_limit]
        s_energies = s_energies[0:evt_limit]

    # read in the background events.
    print("\nReading background events...")
    for fn in range(f_start,f_end):
        b_dat = tb.open_file("{0}/{1}/vox_dnn_Xe_SE_7bar_descape_2x2x2_out_{2}.h5".format(loc,rname,fn), 'r')
        if(fn == f_start):
            print("-- Reading file {0},".format(fn), end=' ')
            b_array = np.array(b_dat.root.maps)
            b_energies = np.array(b_dat.root.energies)
        else:
            print("{0},".format(fn), end=' ')
            b_array = np.concatenate([b_array,np.array(b_dat.root.maps)])
            b_energies = np.concatenate([b_energies,np.array(b_dat.root.energies)])
    print("done.")
    if(evt_limit > 0):
        b_array = b_array[0:evt_limit]
        b_energies = b_energies[0:evt_limit]
    print("\nRead {0} signal events and {1} background events.".format(len(s_array),len(b_array)))
        
    # concatenate the datasets, splitting into training and validation sets
    print("Concatenating datasets...")
    nval = int(fval * (len(s_array) + len(b_array)))
    
    if(nval == 0):
        x_ = np.concatenate([s_array, b_array])
        y_ = np.concatenate([np.ones([len(s_array), 1]), np.zeros([len(b_array), 1])])

        # reshape for training with TensorFlow        
        print("Reshaping projection...")
        x_ = np.reshape(x_, (len(x_), xdim, ydim, zdim, 1))
        print("Finished reading data: {0} training/test events".format(len(x_)))
        
        #mval = np.mean(x_)
        #sigval = np.std(x_)
        #x_ -= mval
        #x_ /= sigval
        
        return x_,y_
    else:
        x_ = np.concatenate([s_array[0:-nval], b_array[0:-nval]])
        y_ = np.concatenate([np.ones([len(s_array[0:-nval]), 1]), np.zeros([len(b_array[0:-nval]), 1])])
        xval_ = np.concatenate([s_array[-nval:], b_array[-nval:]])
        yval_ = np.concatenate([np.ones([len(s_array[-nval:]), 1]), np.zeros([len(b_array[-nval:]), 1])])

        # reshape for training with TensorFlow
        print("Reshaping projection...")
        x_ = np.reshape(x_, (len(x_), xdim, ydim, zdim, 1))
        xval_ = np.reshape(xval_, (len(xval_), xdim, ydim, zdim, 1))
        print("Finished reading data: {0} training/test and {1} validation events".format(len(x_),len(xval_)))
        
        #mval = np.mean(x_)
        #sigval = np.std(x_)
        #x_ -= mval
        #x_ /= sigval
        
        #mval = np.mean(xval_)
        #sigval = np.std(xval_)
        #xval_ -= mval
        #xval_ /= sigval
        
        
        return x_,y_,xval_,yval_

In [4]:
# define the function to read the data from multiple files
def read_real_data(fname):
    """Reads data from the specified file."""
    
    # read in the signal events.
    print("Reading real data...")
    f_dat = tb.open_file(fname, 'r')
    f_array = np.array(f_dat.root.maps)
    f_evts = np.array(f_dat.root.evtnum)
    print("done.")

    # reshape for training with TensorFlow        
    print("Reshaping...")
    x_ = np.reshape(f_array, (len(f_array), xdim, ydim, zdim, 1))
    
    #mval = np.mean(x_)
    #sigval = np.std(x_)
    
    #x_ -= mval
    #x_ /= sigval
    
    print("Finished reading data: {0} events".format(len(x_)))
    return x_,f_evts

### Neural network models
These functions should define and return a Keras model object.

In [5]:
##################################
# Define more neural networks here

def model_3D(inputs):
    
    cinputs = Conv3D(64, (3, 3, 3), padding='valid', strides=(1, 1, 1), activation='relu',kernel_initializer='glorot_normal', kernel_regularizer=l2(0.000001))(inputs)
    cinputs = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='same', data_format=None)(cinputs)
    cinputs = BatchNormalization(epsilon=1e-10, axis=-1, momentum=0.2, weights=None, gamma_regularizer=None, beta_regularizer=None, beta_initializer="zero", gamma_initializer="one")(cinputs)
    cinputs = Conv3D(128, (3, 3, 3), padding='valid', strides=(1, 1, 1), activation='relu',kernel_initializer='glorot_normal', kernel_regularizer=l2(0.000001))(cinputs)
    cinputs = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='same', data_format=None)(cinputs)
    cinputs = BatchNormalization(epsilon=1e-05, axis=4, momentum=0.8, weights=None, gamma_regularizer=None, beta_regularizer=None, beta_initializer="zero", gamma_initializer="one")(cinputs)
    cinputs = Conv3D(256, (2, 2, 2), padding='valid', strides=(1, 1, 1), activation='relu',kernel_initializer='glorot_normal', kernel_regularizer=l2(0.000001))(cinputs)
    cinputs = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='same', data_format=None)(cinputs)
    cinputs = BatchNormalization(epsilon=1e-05, axis=4, momentum=0.99, weights=None, gamma_regularizer=None, beta_regularizer=None, beta_initializer="zero", gamma_initializer="one")(cinputs)
    cinputs = Conv3D(256, (2, 2, 2), padding='valid', strides=(1, 1, 1), activation='relu',kernel_initializer='glorot_normal', kernel_regularizer=l2(0.000001))(cinputs)
    cinputs = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='same', data_format=None)(cinputs)
    cinputs = BatchNormalization(epsilon=1e-05, axis=4, momentum=0.99, weights=None, gamma_regularizer=None, beta_regularizer=None, beta_initializer="zero", gamma_initializer="one")(cinputs)
    cinputs = Conv3D(256, (2, 2, 2), padding='valid', strides=(1, 1, 1), activation='relu',kernel_initializer='glorot_normal', kernel_regularizer=l2(0.000001))(cinputs)
    cinputs = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='same', data_format=None)(cinputs)
    cinputs = BatchNormalization(epsilon=1e-05, axis=4, momentum=0.99, weights=None, gamma_regularizer=None, beta_regularizer=None, beta_initializer="zero", gamma_initializer="one")(cinputs)
    f1 = Flatten()(cinputs)
    f1 = Dense(units=512, activation='relu', kernel_initializer='glorot_normal', kernel_regularizer=l2(0.000001))(f1)
    f1 = Dropout(.7)(f1)

    inc_output = Dense(units=1, activation='sigmoid', kernel_initializer='normal', kernel_regularizer=l2(0.000001))(f1)
    model = Model(inputs, inc_output)
    
    model.compile(loss='binary_crossentropy',
                optimizer=Nadam(lr=0.000002, beta_1=0.9, beta_2=0.999,
                                epsilon=1e-08, schedule_decay=0.001), metrics=['accuracy'])
    
    return model

In [6]:
def model_3D_old(inputs):
    
    cinputs = Conv3D(512, (10, 10, 10), padding='valid', strides=(2, 2, 2), activation='relu',kernel_initializer='lecun_uniform', kernel_regularizer=l2(0.000001))(inputs)
    cinputs = MaxPooling3D(pool_size=(5, 5, 5), strides=(5, 5, 5), padding='same', data_format=None)(cinputs)
    cinputs = BatchNormalization(epsilon=1e-05, axis=4, momentum=0.99, weights=None, gamma_regularizer=None, beta_regularizer=None, beta_initializer="zero", gamma_initializer="one")(cinputs)
    cinputs = Conv3D(256, (1, 1, 1), padding='same', strides=(1, 1, 1), activation='relu',kernel_initializer='lecun_uniform', kernel_regularizer=l2(0.000001))(cinputs)
    cinputs = Conv3D(128, (2, 2, 2), padding='same', strides=(2, 2, 2), activation='relu',kernel_initializer='lecun_uniform', kernel_regularizer=l2(0.000001))(cinputs)
    cinputs = BatchNormalization(epsilon=1e-05, axis=4, momentum=0.99, weights=None, gamma_regularizer=None, beta_regularizer=None, beta_initializer="zero", gamma_initializer="one")(cinputs)
    cinputs = Conv3D(256, (2, 2, 2), padding='same', strides=(2, 2, 2), activation='relu',kernel_initializer='lecun_uniform', kernel_regularizer=l2(0.000001))(cinputs)
    cinputs = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='same', data_format=None)(cinputs)
    f1 = Flatten()(cinputs)
    f1 = Dense(units=128, activation='relu', kernel_initializer='lecun_uniform', kernel_regularizer=l2(0.000001))(f1)
    f1 = Dropout(.3)(f1)

    inc_output = Dense(units=1, activation='sigmoid', kernel_initializer='normal', kernel_regularizer=l2(0.000001))(f1)
    model = Model(inputs, inc_output)
    
    model.compile(loss='binary_crossentropy',
                optimizer=Nadam(lr=0.00001, beta_1=0.9, beta_2=0.999,
                                epsilon=1e-08, schedule_decay=0.01), metrics=['accuracy'])
    
    return model

In [7]:
def model_CNN(inputs):
#, kernel_regularizer=l2(0.1)    
    cinputs = Conv2D(32, (5, 5), padding='same', strides=(2, 2), activation='relu', kernel_initializer='he_normal')(inputs)
    cinputs = AveragePooling2D(pool_size=(2, 2), data_format=None, padding="same", strides=(2, 2))(cinputs)
    cinputs = BatchNormalization(epsilon=1e-05, axis=3, momentum=0.99, weights=None, beta_initializer='zero', gamma_initializer='one', gamma_regularizer=None, beta_regularizer=None)(cinputs)
    cinputs = Conv2D(64, (3, 3), padding='same', strides=(1, 1), activation='relu', kernel_initializer='he_normal')(cinputs)
    cinputs = BatchNormalization(epsilon=1e-05, axis=3, momentum=0.99, weights=None, beta_initializer='zero', gamma_initializer='one', gamma_regularizer=None, beta_regularizer=None)(cinputs)
    cinputs = AveragePooling2D(pool_size=(2, 2), data_format=None, padding="same", strides=(2, 2))(cinputs)
    cinputs = Conv2D(128, (2, 2), padding='same', strides=(1, 1), activation='relu', kernel_initializer='he_normal')(cinputs)
    cinputs = BatchNormalization(epsilon=1e-05, axis=3, momentum=0.99, weights=None, beta_initializer='zero', gamma_initializer='one', gamma_regularizer=None, beta_regularizer=None)(cinputs)
    cinputs = AveragePooling2D(pool_size=(2, 2), data_format=None, padding="same", strides=(2, 2))(cinputs)
    f1 = Flatten()(cinputs)
    f1 = Dense(units=16, activation='relu', kernel_initializer='he_normal')(f1)
    f1 = Dropout(.4)(f1)

    inc_output = Dense(units=1, activation='sigmoid', kernel_initializer='lecun_normal')(f1)
    model = Model(inputs, inc_output)
    
    model.compile(loss='binary_crossentropy',
                      optimizer=Nadam(lr=0.001, beta_1=0.9, beta_2=0.999,
                                      epsilon=1e-08, schedule_decay=0.001), metrics=['accuracy'])
    
    return model

In [8]:
#testmodel
def model_test(inpus):
    f1 = Flatten()(inputs)
    f1 = Dense(units=128, kernel_initializer="normal", activation="relu", kernel_regularizer=l2(0.00001), activity_regularizer=l1(0.00001))(f1)
    f1 = Dropout(.7)(f1)
    inc_output = Dense(units=1, kernel_initializer="normal", activation="sigmoid")(f1)
    model = Model(inputs, inc_output)

    model.compile(loss='binary_crossentropy',
                  optimizer=Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004), metrics=['accuracy'])  
    return model

In [9]:
# define a fully-connected neural network with 64 hidden neurons and 1 readout neuron
def model_FC(inputs):
    
    f1 = Flatten()(inputs)
    f1 = Dense(units=64, kernel_initializer="normal", activation="sigmoid")(f1)
    f1 = Dense(units=64, kernel_initializer="normal", activation="sigmoid")(f1)
    f1 = Dense(units=64, kernel_initializer="normal", activation="sigmoid")(f1)
    f1 = Dropout(.3)(f1)
    inc_output = Dense(units=1, kernel_initializer="normal", activation="sigmoid")(f1)
    model = Model(inputs, inc_output)

    model.compile(loss='binary_crossentropy',
                  optimizer=Nadam(lr=0.0001, beta_1=0.9, beta_2=0.999,
                                  epsilon=1e-08, schedule_decay=0.01),
                                  metrics=['accuracy'])  
    return model

In [10]:
# plot a 20x20 SiPM map
# -- carried over from NEW_kr_diff_mc_train.ipynb
def NEW_SiPM_map_plot(xarr, normalize=True):
    """
    Plots a SiPM map in the NEW Geometry
    xarr is a NEW sipm map, yarr the pair of coordinates the map corresponds to
    """
    if normalize:
        probs = (xarr - np.min(xarr))
        probs /= np.max(probs)
    else: 
        probs = xarr

    # set up the figure
    fig = plt.figure();
    ax1 = fig.add_subplot(111);
    fig.set_figheight(10.0)
    fig.set_figwidth(10.0)
    ax1.axis([0, 500, 0, 500]);

    for i in range(xdim):
        for j in range(ydim):
            r = Ellipse(xy=(i * 10 + 5, j * 10 + 5), width=5., height=5.);
            r.set_facecolor('0');
            r.set_alpha(probs[i, j]);
            ax1.add_artist(r);
        
    plt.xlabel("x (mm)");
    plt.ylabel("y (mm)");

## Function to fix image writing in Tensorboard

In [11]:
def tboard_fix(tbrd, model):
        sess = K.get_session()
        for layer in model.layers:

            for weight in layer.weights:
                tf.summary.histogram(weight.name, weight)
                shape = weight.get_shape()
                if not(len(shape) == 1 and shape[0] == 1):
                    w_img = tf.squeeze(weight)
                    shape = w_img.get_shape()
                    print("Shape is {0}".format(shape))
                    if len(shape) > 1 and shape[0] > shape[1]:
                        w_img = tf.transpose(w_img)
                    if len(shape) == 1:
                        w_img = tf.expand_dims(tf.expand_dims(tf.expand_dims(w_img,0), 0), -1)
                    if len(shape) == 2:
                        w_img = tf.expand_dims(tf.expand_dims(w_img, 0), -1)
                    if len(shape) == 3:
                        w_img = tf.transpose(w_img, perm=[2, 0, 1])
                        w_img = tf.expand_dims(w_img, -1)
                    if(len(shape) == 4):
                        for ii in range(shape[3]):
                            print("-- Adding image for filter {0}".format(ii))
                            wi_img = tf.transpose(w_img[:,:,:,ii], perm=[2, 0, 1])
                            wi_img = tf.expand_dims(wi_img,-1)
                            tf.summary.image("{0}_{1}".format(weight.name,ii),wi_img)
                    elif(len(shape) < 5):
                        tf.summary.image(weight.name, w_img)

            if hasattr(layer, 'output'):
                tf.summary.histogram('{}_out'.format(layer.name),
                                     layer.output)

## Load in the data

In [None]:
# read in the training data
x_train, y_train, x_val, y_val = read_data(data_location, run_name, train_fstart, train_fend, 0.05)

Reading signal events...
-- Reading file 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 

In [None]:
print(np.sum(x_train[0]))

In [None]:
# plot one slice of one event
NEW_SiPM_map_plot(x_train[2,:,:,0,0])
print(np.sum(x_train[0,:,:,:,0]))

## Define and train the DNN

In [None]:
# set load_model to true and specify the file to load in a previously defined/trained model
load_weights = False
mfile = 'models/weights-07-0.5093-0.5288.h5'

if(load_weights):
    model = load_model(mfile)
else:
    
    # otherwise define the model
    inputs = Input(shape=(xdim, ydim, zdim, 1))
    model = model_3D(inputs)
    
# define callbacks (actions to be taken after each epoch of training)
file_lbl = "{epoch:02d}-{loss:.4f}-{val_loss:.4f}"
filepath="weights-{0}.h5".format(file_lbl)
checkpoint = callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True, mode='min')
tboard = callbacks.TensorBoard(log_dir='/data/fastmc/descape/logs', histogram_freq=0, write_graph=True, write_images=False)
#tboard_fix(tboard,model)
lcallbacks = [checkpoint, tboard]
model.summary()

#for layer in model.layers:
#    cls = layer.__class__.__name__
#    if(cls == "Conv3D"):
#        print("{0} ({1}) kernel = {2}; filters = {3}; strides = {4}; activation = {5}".format(cls,layer.name,layer.kernel_size,layer.filters,layer.strides,layer.activation))
#    elif(cls == "MaxPooling3D"):
#        print("{0} ({1}) pool = {2}; strides = {3}".format(cls,layer.name,layer.pool_size,layer.strides))
#    elif(cls == "BatchNormalization"):
#        print("{0} ({1})".format(cls,layer.name))
#    elif(cls == "Dense"):
#        print("{0} ({1}) units = {2}; activation = {3}".format(cls,layer.name,layer.units,layer.activation))
#    elif(cls == "Dropout"):
#        print("{0} ({1}) rate = {2}".format(cls,layer.name,layer.rate))
#    else:
#        print("{0} ({1})".format(cls,layer.name))       

In [None]:
x_train.shape

In [16]:
# train the model
hist = model.fit(x_train, y_train, shuffle=True, epochs=100, batch_size=50, verbose=1, validation_data=(x_val,y_val), callbacks=lcallbacks)

Train on 27000 samples, validate on 3000 samples
Epoch 1/100
 1000/27000 [>.............................] - ETA: 2:15 - loss: 0.2174 - acc: 0.9190

KeyboardInterrupt: 

## Compute the predictions for real data

In [None]:
x_real, evt_real = read_real_data("/data/fastmc/descape/classification/data_4735_nocenter_E.h5")

In [None]:
y_real = model.predict(x_real, batch_size=100, verbose=0)

In [None]:
npred_signal = sum(y_real > 0.205)
npred_background = sum(y_real <= 0.205)
print("Number of predicted signal events = {}, background events = {}".format(npred_signal,npred_background))

In [None]:
# Print the arrays to a file.
ysave = y_real.reshape(len(y_real))
esave = evt_real.reshape(len(evt_real))
np.savez("classification.npz",evtnum=esave,y=ysave)

In [None]:
print(np.sum(x_real[0]))

# Test the trained model

In [None]:
# read in the test data
x_test, y_test = read_data(data_location, run_name, test_fstart, test_fend, 0.0)

In [None]:
# compute the predictions
loss_and_metrics = model.evaluate(x_test, y_test);
y_pred = model.predict(x_test, batch_size=50, verbose=0)
print(loss_and_metrics)

In [None]:
print(y_pred[0:100])

In [None]:
# create lists of values for signal vs. background curve
npoints = 200
fname_svsb = "plt/plt_convnet_4M_gabriel_pRelu.h5"
bg_rej = []; si_eff = []
print("-- Calculating points...")
for thh in np.arange(0,1,1./npoints):
    nts = 0; ntb = 0
    ncs = 0; ncb = 0
    for ye,yp in zip(y_test,y_pred):
        if(ye == 0):
            ntb += 1  # add one background event
            if(yp < thh):
                ncb += 1  # add one correctly predicted background event

        if(ye == 1):
            nts += 1  # add one signal event
            if(yp >= thh):
                ncs += 1  # add one correctly predicted signal event
                
    si_eff.append(1.0*ncs/nts)
    bg_rej.append(1.0*ncb/ntb)
    #print("-- {0} of {1} ({2}%) correct background events; {3} of {4} ({5}%) correct signal events".format(ncb,ntb,1.0*ncb/ntb*100,ncs,nts,1.0*ncs/nts*100))

# save the results to file
print("-- Saving results...")
si_eff = np.array(si_eff); bg_rej = np.array(bg_rej)
f = tb.open_file(fname_svsb, 'w')
filters = tb.Filters(complib='blosc', complevel=9, shuffle=False)

atom    = tb.Atom.from_dtype(si_eff.dtype)
sarr    = f.create_earray(f.root, 'si_eff', atom, (0, npoints), filters=filters)
sarr.append([si_eff])

atom    = tb.Atom.from_dtype(bg_rej.dtype)
sarr    = f.create_earray(f.root, 'bg_rej', atom, (0, npoints), filters=filters)
sarr.append([bg_rej])

f.close()

In [None]:
# plot signal vs. background curves
#fnames = ["plt/reset_V10_iter100.h5", "plt/reset_V10_iter500.h5", "plt/reset_V10_iter1000.h5", "plt/reset_V10_2D.h5", "plt/classification_V10_3Dconv.h5"]
#labels = ["RESET V10, iter100", "RESET V10, iter500", "RESET V10, iter1000", "RESET V10, latest", "SiPMs only (3D), V10"]
#colors = ["green", "black", "brown", "blue", "orange"]
fnames = ["plt/plt_conv3d_classifier_300317.h5", "plt/plt_convnet_4M_gabriel.h5", "plt/plt_convnet_4M_gabriel_pRelu.h5"]
labels = ["Previous Result", "CONVNET 4M", "CONVNET 4M PRELU"]
colors = ["yellow", "blue", "red"]

# set up the plot
fig = plt.figure()
fig.set_figheight(5.0)
fig.set_figwidth(7.5)

for nm,lb,co in zip(fnames,labels,colors):
    
    # read in the signal efficiency vs. background rejection information
    fn = tb.open_file(nm,'r')
    eff = fn.root.si_eff[0]
    bgr = fn.root.bg_rej[0]
    
    plt.plot(eff,bgr,color=co,label=lb,lw=2)
    fn.close()
    
plt.xlabel("signal efficiency")
plt.ylabel("background rejection")
plt.legend(loc=3)

# Compare data and MC

In [None]:
x_MC_si = x_train[0:998]
x_MC_bg = x_train[17100:18098]
x_data = x_real

In [None]:
print(np.mean(x_MC_bg[3]))
print(np.std(x_MC_bg[12]))