In [20]:
#!/usr/bin/env python
# encoding: utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
# Install TensorFlow
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Flatten , Convolution2D, MaxPooling2D , Lambda, Conv2D, Activation,Concatenate
from tensorflow.keras.layers import ActivityRegularization
from tensorflow.keras.optimizers import Adam , SGD , Adagrad
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, CSVLogger, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers , initializers
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import NumpyArrayIterator



gpus = tf.config.experimental.list_physical_devices('GPU')
# if gpus:
#   # Restrict TensorFlow to only use the first GPU
try:
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
    tf.config.experimental.set_virtual_device_configuration(
    gpus[0],
    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10000)])
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
except RuntimeError as e:
# Visible devices must be set before GPUs have been initialized
    print(e)



from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold
# from xgboost import XGBClassifier
import tensorflow.keras.backend as K
from sklearn import metrics
from joblib import dump, load

# !pip3 install keras-tuner --upgrade
# !pip3 install autokeras
import kerastuner as kt
import autokeras as ak

# Import local libraries
import numpy as np
import matplotlib.pyplot as plt
import time
import pandas as pd
import importlib
import os
from tqdm import tqdm 

import logging

importlib.reload(logging)
logging.basicConfig(level = logging.INFO)

os.environ['NUMEXPR_MAX_THREADS'] = '64'
os.environ['NUMEXPR_NUM_THREADS'] = '64'

print("Tensorflow Version is {}".format(tf.__version__))
print("Keras Version is {}".format(tf.keras.__version__))
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
tf.device('/device:XLA_GPU:0')
!nvidia-smi

1 Physical GPUs, 1 Logical GPU
Tensorflow Version is 2.4.1
Keras Version is 2.4.0
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 7014882791185173419
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 10485760000
locality {
  bus_id: 2
  numa_node: 1
  links {
  }
}
incarnation: 5489862345064373668
physical_device_desc: "device: 0, name: GeForce RTX 3090, pci bus id: 0000:c1:00.0, compute capability: 8.6"
]
Tue Sep 21 11:40:24 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.23.04    Driver Version: 455.23.04    CUDA Version: 11.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 3090    

In [8]:
%%time
HOMEPATH = "/dicos_ui_home/alanchung/Universality_Boosetd_Higgs/"

data_train = {
            "herwig_ang_train" : 0,
#             "pythia_def_train" : 0,
#             "pythia_vin_train" : 0,
#         "pythia_dip_train" : 0,
#             "sherpa_def_train" : 0
        }  

for i, element in enumerate(data_train):
#         data_train[element] = pd.read_csv(savepath + "BDT/" + str(element) + ".csv")

    """
    Pt Range Study
    """
    pt_min, pt_max = 300, 500
    tmp = pd.read_csv(HOMEPATH + "Notebook/KFold_BDT/" + str(element) + ".csv")
    tmp = tmp[(tmp["PTJ_0"] >= pt_min)  & (tmp["PTJ_0"] < pt_max)]
    tmp = tmp[(tmp["MJ_0"] >= 110)  & (tmp["MJ_0"] < 160)]
    data_train[element] = shuffle(tmp)



logging.info("All Files are loaded!")

logging.info("H jet : QCD jet = 1 : 1")
logging.info("\r")


train = [ len(data_train[element]) for j, element in enumerate(data_train)]
logging.info("{:^8}{:^15}".format("",str(element)))
logging.info("{:^8}{:^15}".format("Train #",train[0]))


for i, element in enumerate(data_train):
    total_list = data_train[element].columns
    break

logging.info("total_list: {}".format(total_list))





features = ["MJ_0","t21_0","D21_0","D22_0","C21_0","C22_0"] #7/14

BDT_Model_A1 = {
              "herwig_ang" : 0,
#               "pythia_def" : 0, 
#               "pythia_vin" : 0, 
#               "pythia_dip" : 0, 
#               "sherpa_def" : 0,
            }


kf = KFold(n_splits = 100)

for i,(model, trainingdata) in enumerate(zip(BDT_Model_A1, data_train)):

    logging.info("BDT Model: {}  Training Data: {}".format(model, trainingdata))
    
    for model_index, (train_index, val_index) in enumerate(kf.split(data_train[trainingdata]["target"])):
        training_data = data_train[trainingdata].iloc[train_index]
        validation_data = data_train[trainingdata].iloc[val_index]

        filepath = "./Pythia_dip_KFold/"+str(model)+"_Models_"+str(int(pt_min))+str(int(pt_max))+"/" + str(model) + "_BDT_"+str(model_index)+ ".h5"
        if os.path.exists(filepath):
            BDT_Model_A1[model] = load_model(filepath)
            logging.info(str(model) + " BDT model 1 is loaded!")
            logging.info("######################################################################################")
            logging.info("\n")
            
            
        else:
            raise ValueError("Pleas check model path !!")

INFO:root:All Files are loaded!
INFO:root:H jet : QCD jet = 1 : 1
INFO:root:
INFO:root:        herwig_ang_train
INFO:root:Train #     268760     
INFO:root:total_list: Index(['GEN', 'SHO', 'PRO', 'MJ_0', 'PTJ_0', 'eta_0', 'phi_0', 't21_0',
       'D21_0', 'D22_0', 'C21_0', 'C22_0', 'MJ', 'PTJ', 'eta', 'phi', 't21',
       'D21', 'D22', 'C21', 'C22', 'weight', 'eventindex', 'WEIGHT', 'index',
       'target'],
      dtype='object')
INFO:root:BDT Model: herwig_ang  Training Data: herwig_ang_train


ValueError: Pleas check model path !!

In [29]:
%%time
HOMEPATH = "/dicos_ui_home/alanchung/Universality_Boosetd_Higgs/"
Data_High_Level_Features_path =  HOMEPATH + "Data_High_Level_Features/"
savepath = HOMEPATH + "Data_ML/"



data_train = {
        "herwig_ang_train" : 0,
        "pythia_def_train" : 0,
        "pythia_vin_train" : 0,
        "pythia_dip_train" : 0,
#             "sherpa_def_train" : 0
        }  

data_test = {
        "herwig_ang_test" : 0,
        "pythia_def_test" : 0,
        "pythia_vin_test" : 0,
        "pythia_dip_test" : 0,
#             "sherpa_def_test" : 0
        }  


BDT_Model_A1 = {
      "herwig_ang" : 0,
      "pythia_def" : 0, 
      "pythia_vin" : 0, 
      "pythia_dip" : 0, 
#       "sherpa_def" : 0,
    }

       
for i, model in enumerate(BDT_Model_A1): 
    logging.info("BDT Model: {}".format(model))

    for j, traindata in enumerate(data_train):


        """
        Pt Range Study
        """
    #         for k, (pt_min, pt_max) in  enumerate(zip([300,400,500,600,700,800],[400,500,600,700,800,900])):
        for k, (pt_min, pt_max) in  enumerate(zip([300],[500])):

            train_data_path = HOMEPATH + "Notebook/KFold_BDT/" + str(traindata) + ".csv"
            if os.path.exists(train_data_path):
                tmp = pd.read_csv(train_data_path)
            else:
                raise ValueError("Pleas check training data path !!")
                
            tmp = tmp[(tmp["PTJ_0"] >= pt_min)  & (tmp["PTJ_0"] < pt_max)]
#             tmp = tmp[(tmp["PTJ_0"] >= 300)  & (tmp["PTJ_0"] < 500)]
            tmp = tmp[(tmp["MJ_0"] >= 110)  & (tmp["MJ_0"] < 160)]
            data_train[traindata] = shuffle(tmp)#[:30000]


#             test_data_path = HOMEPATH + "Notebook/KFold_BDT/" + str(testdata) + ".csv"
#             if os.path.exists(test_data_path):
#                 tmp = pd.read_csv(test_data_path)
#             else:
#                 raise ValueError("Pleas check test data path !!")

#             tmp = tmp[(tmp["PTJ_0"] >= pt_min)  & (tmp["PTJ_0"] < pt_max)]
#             tmp = tmp[(tmp["MJ_0"] >= 110)  & (tmp["MJ_0"] < 160)]
#             data_test[testdata] = shuffle(tmp)
            
            logging.info("START===========================================START")
            logging.info("\r") 
            logging.info("All Files are loaded!")
            logging.info("pt min: {} , pt max: {}".format(pt_min, pt_max))
            logging.info("\r")
            logging.info("H jet : QCD jet = 1 : 1")

            train = len(data_train[traindata])
#             test = len(data_test[testdata])
        #     logging.info("{:^8}{:^15}{:^15}{:^15}{:^15}".format("","Herwig Angular","Pythia Default","Pythia Vincia","Pythia Dipole"))
        #     logging.info("{:^8}{:^15}{:^15}{:^15}{:^15}".format("Train #",train[0],train[1],train[2],train[3]))
    #         logging.info("{:^8}{:^15}".format("",str(testdata)))
            logging.info("{:^8}{:^15}".format("Train #",train))
#             logging.info("{:^8}{:^15}".format("Test #",test))






            features = ["MJ_0","t21_0","D21_0","D22_0","C21_0","C22_0"] 


            kf = KFold(n_splits = 100)
            # kf = GroupKFold(n_splits = 100)
#             skf = StratifiedKFold(n_splits = 2, random_state = 7, shuffle = True) 

#             model = "pythia_def"

            logging.info("BDT Model: {}  Test Data: {}".format(model, traindata))
            time.sleep(0.5)
            for model_index, (train_index, val_index) in tqdm(enumerate(kf.split(data_train[traindata]["target"]))):
                ticks_1 = time.time()

    #             logging.info("TRAIN: {} VAL: {}".format(train_index, val_index))
    #                 training_data = data_train[traindata].iloc[train_index]
                validation_data = data_train[traindata].iloc[val_index]
#                 logging.info(len(validation_data[validation_data["target"]==0]))
    #             validation_data = data_train[traindata]
    #                 validation_data = data_test[testdata]

        #         print(len(training_data[training_data["target"]==1])/len(training_data))
        #         print(len(validation_data[validation_data["target"]==0])/len(validation_data))
        #         print(len(training_data)/len(data_train[trainingdata]))
        #         print(len(validation_data)/len(data_train[trainingdata]))
        #         print(model_index)

                Performance_Frame = {
                "AUC" : [0],
                "max_sig" : [0],
                "r05" : [0],
                "time": [0]
                }


                filepath = "./"+str(model)+"_KFold/BDT_"+str(model)+"_Models_"+str(int(pt_min))+str(int(pt_max))+"/" + str(model) + "_BDT_"+str(model_index)+ ".h5"
                if os.path.exists(filepath):
    #                     BDTModel_A1[model] = load_model(filepath)

                    model_BDT = load(filepath)

                    prediction_test =  model_BDT.predict_proba(np.asarray(validation_data[features]))[:,1]
                    discriminator_test = prediction_test
                    discriminator_test = discriminator_test/(max(discriminator_test))

                    Performance_Frame["AUC"][0] = metrics.roc_auc_score(validation_data["target"], discriminator_test)
                    FalsePositiveFull, TruePositiveFull, _ = metrics.roc_curve(validation_data["target"], discriminator_test)
                    tmp = np.where(FalsePositiveFull != 0)
                    Performance_Frame["max_sig"][0] = max(TruePositiveFull[tmp]/np.sqrt(FalsePositiveFull[tmp])) 
                    tmp = np.where(TruePositiveFull >= 0.5)
                    Performance_Frame["r05"][0]= 1./FalsePositiveFull[tmp[0][0]]

                    Performance_Frame["time"][0] = (time.time() - ticks_1)/60.

                    dataframe = pd.DataFrame(Performance_Frame)



                    csvdata_path = "./"+str(model)+"_KFold/BDT_"+str(model)+"_Performance_Table_to_"+str(traindata)+"_"+str(pt_min)+str(pt_max)+".csv"
                    if os.path.exists(csvdata_path):
                        save_to_csvdata = pd.read_csv(csvdata_path)
                        DATA = pd.concat([save_to_csvdata, dataframe], ignore_index=True, axis=0,join='inner')
                        DATA.to_csv(csvdata_path, index = 0)

                    else:
                        dataframe.to_csv(csvdata_path, index = 0)


                else:
                    raise ValueError("Pleas check model path !!")


            logging.info("There are {} events for each test.".format(len(validation_data)))
            logging.info("{} BDT models apply on {} is finished!!".format(model,traindata))
            logging.info("END===========================================END")
            logging.info("\n")


    logging.info("\n")
    logging.info("\n")


INFO:root:BDT Model: herwig_ang
INFO:root:
INFO:root:All Files are loaded!
INFO:root:pt min: 300 , pt max: 500
INFO:root:
INFO:root:H jet : QCD jet = 1 : 1
INFO:root:Train #     268760     
INFO:root:BDT Model: herwig_ang  Test Data: herwig_ang_train
100it [00:11,  9.08it/s]
INFO:root:There are 2687 events for each test.
INFO:root:herwig_ang BDT models apply on herwig_ang_train is finished!!
INFO:root:

INFO:root:
INFO:root:All Files are loaded!
INFO:root:pt min: 300 , pt max: 500
INFO:root:
INFO:root:H jet : QCD jet = 1 : 1
INFO:root:Train #     268760     
INFO:root:BDT Model: herwig_ang  Test Data: pythia_def_train
100it [00:10,  9.94it/s]
INFO:root:There are 2687 events for each test.
INFO:root:herwig_ang BDT models apply on pythia_def_train is finished!!
INFO:root:

INFO:root:
INFO:root:All Files are loaded!
INFO:root:pt min: 300 , pt max: 500
INFO:root:
INFO:root:H jet : QCD jet = 1 : 1
INFO:root:Train #     268760     
INFO:root:BDT Model: herwig_ang  Test Data: pythia_vin_train

CPU times: user 2min 49s, sys: 1.97 s, total: 2min 51s
Wall time: 3min 3s


In [32]:
%%time
BDT_Model_A1 = {
              "herwig_ang" : 0,
#               "pythia_def" : 0, 
#               "pythia_vin" : 0, 
#               "pythia_dip" : 0, 
#               "sherpa_def" : 0,
            }

Performance_Frame = {
            "herwig_ang_train" : 0,
            "pythia_def_train" : 0,
            "pythia_vin_train" : 0,
            "pythia_dip_train" : 0,
        }  

        
# index=["Herwig Angular", "Pythia Default", "Pythia Vincia", "Pythia Dipole", "Average", "STD"]

for i, model in enumerate(BDT_Model_A1): 
    logging.info("BDT Model: {} ".format(model))
    

    
    for j, element in enumerate(Performance_Frame): 
        
        Performance = {
                    "AUC" : [0,0,0,0,0,0],
                    "AUC std" : [0,0,0,0,0,0],
                    "max_sig" : [0,0,0,0,0,0],
                    "max_sig std" : [0,0,0,0,0,0],
                    "r05" : [0,0,0,0,0,0],
                    "r05 std" : [0,0,0,0,0,0],
                    "time": [0,0,0,0,0,0],
                    "time std" : [0,0,0,0,0,0],
                    }

    
#         for k, (pt_min, pt_max) in  enumerate(zip([300,400,500,600,700,800],[400,500,600,700,800,900])):
        for k, (pt_min, pt_max) in  enumerate(zip([300],[500])):
            Performance_Table = pd.read_csv("./"+str(model)+"_KFold/BDT_"+str(model)+"_Performance_Table_to_"+str(element)+"_"+str(pt_min)+str(pt_max)+".csv")
#             Performance_Table = pd.read_csv("./"+str(model)+"_KFold/BDT"+str(model)+"_Performance_Table_to_"+str(element)+"_"+str(pt_min)+str(pt_max)+".csv")
            
            logging.info(len(Performance_Table))

            Performance_Table.drop(Performance_Table[Performance_Table["AUC"] <= 0.7].index, inplace=True)
            Performance_Table.drop(Performance_Table[Performance_Table["AUC"] == 1].index, inplace=True)
            Performance["AUC"][k] = Performance_Table["AUC"].mean()
            Performance["AUC std"][k] = Performance_Table["AUC"].std()
            Performance["max_sig"][k] = Performance_Table["max_sig"].mean()
            Performance["max_sig std"][k] = Performance_Table["max_sig"].std()
            Performance["r05"][k] = Performance_Table["r05"].mean()
            Performance["r05 std"][k] = Performance_Table["r05"].std()
            Performance["time"][k] = Performance_Table["time"].mean()
            Performance["time std"][k] = Performance_Table["time"].std()



#         Performance_Frame[element] = pd.DataFrame(Performance,
#                      index=["pt300400","pt400500","pt500600","pt600700","pt700800","pt800900"]
#                     )
        
        Performance_Frame[element] = pd.DataFrame(Performance,
                     index=["pt3001100","null","null","null","null","null"]
                    )

INFO:root:BDT Model: herwig_ang 
INFO:root:100
INFO:root:100
INFO:root:100
INFO:root:100


CPU times: user 56.9 ms, sys: 3.99 ms, total: 60.9 ms
Wall time: 60.4 ms


In [33]:
Performance_Frame["herwig_ang_train"]

Unnamed: 0,AUC,AUC std,max_sig,max_sig std,r05,r05 std,time,time std
pt3001100,0.820742,0.008435,2.42764,0.614893,11.452498,1.395127,0.001678,0.000558
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
Performance_Frame["pythia_def_train"]

Unnamed: 0,AUC,AUC std,max_sig,max_sig std,r05,r05 std,time,time std
pt3001100,0.838375,0.00772,2.632908,0.621528,15.824896,1.726439,0.001546,0.000429
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
Performance_Frame["pythia_vin_train"]

Unnamed: 0,AUC,AUC std,max_sig,max_sig std,r05,r05 std,time,time std
pt3001100,0.851265,0.00724,4.579889,1.569768,22.259458,4.10604,0.001464,0.000379
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
Performance_Frame["pythia_dip_train"]

Unnamed: 0,AUC,AUC std,max_sig,max_sig std,r05,r05 std,time,time std
pt3001100,0.854289,0.006859,3.298941,0.669124,19.68668,2.966062,0.001482,0.000398
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
metric = "AUC"

print(np.mean([Performance_Frame["herwig_ang_train"][metric],Performance_Frame["pythia_def_train"][metric],Performance_Frame["pythia_vin_train"][metric],Performance_Frame["pythia_dip_train"][metric]], axis=0))
print(np.std([Performance_Frame["herwig_ang_train"][metric],Performance_Frame["pythia_def_train"][metric],Performance_Frame["pythia_vin_train"][metric],Performance_Frame["pythia_dip_train"][metric]], axis=0))



[0.84116783 0.         0.         0.         0.         0.        ]
[0.01322063 0.         0.         0.         0.         0.        ]


In [38]:
np.mean([0.854 ,0.860 ,0.862 ,0.865])

0.86025

In [39]:
np.std([0.854 ,0.860 ,0.862 ,0.865])

0.004023369234857774

In [41]:
%%time
BDT_Model_A1 = {
              "herwig_ang" : 0,
              "pythia_def" : 0, 
              "pythia_vin" : 0, 
              "pythia_dip" : 0, 
#               "sherpa_def" : 0,
            }

Performance_Frame = {
#             "herwig_ang_train" : 0,
#             "pythia_def_train" : 0,
#             "pythia_vin_train" : 0,
            "pythia_dip_train" : 0,
        }  

        
# index=["Herwig Angular", "Pythia Default", "Pythia Vincia", "Pythia Dipole", "Average", "STD"]


    

    
for j, element in enumerate(Performance_Frame): 
        
    for i, model in enumerate(BDT_Model_A1): 
        logging.info("BDT Model: {} ".format(model))
        
        Performance = {
                    "AUC" : [0,0,0,0,0,0],
                    "AUC std" : [0,0,0,0,0,0],
                    "max_sig" : [0,0,0,0,0,0],
                    "max_sig std" : [0,0,0,0,0,0],
                    "r05" : [0,0,0,0,0,0],
                    "r05 std" : [0,0,0,0,0,0],
                    "time": [0,0,0,0,0,0],
                    "time std" : [0,0,0,0,0,0],
                    }

    
#         for k, (pt_min, pt_max) in  enumerate(zip([300,400,500,600,700,800],[400,500,600,700,800,900])):
        for k, (pt_min, pt_max) in  enumerate(zip([300],[500])):
            Performance_Table = pd.read_csv("./"+str(model)+"_KFold/BDT_"+str(model)+"_Performance_Table_to_"+str(element)+"_"+str(pt_min)+str(pt_max)+".csv")
#             Performance_Table = pd.read_csv("./"+str(model)+"_KFold/BDT"+str(model)+"_Performance_Table_to_"+str(element)+"_"+str(pt_min)+str(pt_max)+".csv")
            
            logging.info(len(Performance_Table))

            Performance_Table.drop(Performance_Table[Performance_Table["AUC"] <= 0.7].index, inplace=True)
            Performance_Table.drop(Performance_Table[Performance_Table["AUC"] == 1].index, inplace=True)
            Performance["AUC"][k] = Performance_Table["AUC"].mean()
            Performance["AUC std"][k] = Performance_Table["AUC"].std()
            Performance["max_sig"][k] = Performance_Table["max_sig"].mean()
            Performance["max_sig std"][k] = Performance_Table["max_sig"].std()
            Performance["r05"][k] = Performance_Table["r05"].mean()
            Performance["r05 std"][k] = Performance_Table["r05"].std()
            Performance["time"][k] = Performance_Table["time"].mean()
            Performance["time std"][k] = Performance_Table["time"].std()



#         Performance_Frame[element] = pd.DataFrame(Performance,
#                      index=["pt300400","pt400500","pt500600","pt600700","pt700800","pt800900"]
#                     )
        
        BDTModel_A1[model] = pd.DataFrame(Performance,
                     index=["pt3001100","null","null","null","null","null"]
                    )

INFO:root:BDT Model: herwig_ang 
INFO:root:100
INFO:root:BDT Model: pythia_def 
INFO:root:100
INFO:root:BDT Model: pythia_vin 
INFO:root:100
INFO:root:BDT Model: pythia_dip 
INFO:root:100


CPU times: user 54.4 ms, sys: 5.08 ms, total: 59.5 ms
Wall time: 59.3 ms


In [42]:
metric = "AUC"

print(np.mean([BDTModel_A1["herwig_ang"][metric],BDTModel_A1["pythia_def"][metric],BDTModel_A1["pythia_vin"][metric],BDTModel_A1["pythia_dip"][metric]], axis=0))
print(np.std([BDTModel_A1["herwig_ang"][metric],BDTModel_A1["pythia_def"][metric],BDTModel_A1["pythia_vin"][metric],BDTModel_A1["pythia_dip"][metric]], axis=0))



[0.8606511 0.        0.        0.        0.        0.       ]
[0.00499525 0.         0.         0.         0.         0.        ]
