In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping



from lib import *


t = CEventsTable()
t.appendFromCsv("ivan_ntuples/tH.csv",1,"tH")
t.appendFromCsv("ivan_ntuples/ttb.csv",0,"ttb")
t.appendFromCsv("ivan_ntuples/ttc.csv",0,"ttc")
t.appendFromCsv("ivan_ntuples/ttL.csv",0,"ttL")
t.appendFromCsv("nazim_ntuples/ttH.csv",0,"ttH")
t.appendFromCsv("nazim_ntuples/ttZ.csv",0,"ttZ")
t.appendFromCsv("ivan_ntuples/ttW.csv",0,"ttW")
t.appendFromCsv("ivan_ntuples/tZq.csv",0,"tZq")
t.appendFromCsv("ivan_ntuples/tWZ.csv",0,"tWZ")
t.appendFromCsv("ivan_ntuples/tW.csv",0,"single_tW")
t.appendFromCsv("ivan_ntuples/single_t1.csv",0,"single_tt")
t.appendFromCsv("ivan_ntuples/single_t2.csv",0,"single_tt")
t.appendFromCsv("ivan_ntuples/single_ts.csv",0,"single_ts")
t.appendFromCsv("nazim_ntuples/WZ.csv",0,"WZ")
t.appendFromCsv("nazim_ntuples/VV1.csv",0,"VV")
t.appendFromCsv("nazim_ntuples/VV2.csv",0,"VV")
t.appendFromCsv("ivan_ntuples/data.csv",0,"nonp")



print('Before preselection:')
t.printTypesNumbers()
t.applyPreselection()
print('After preselection:')
t.printTypesNumbers()
print('With weights:')
t.calculateWeights()
t.printTypesNumbers()
    


#mld = CCreateDfRatioAll(t.table, 0.7, create_val = False)
np.set_printoptions(suppress=True)
pd.options.display.float_format = '{:.8f}'.format



mld = CSampledDatasetExpEvents(t.table, (20000,100000), (20000,100000))
#mld = CSampledDatasetAcc(t.table, (20000,100000), (10000,100000))
mld.createStandartizedDatasets()

mld.printInfo()


X_train = mld.X_train_s
y_train = mld.y_train
types_train = mld.types_train
weights_train = mld.weights_train

X_test = mld.X_val_s
y_test = mld.y_val
types_test = mld.types_val
weights_test = mld.weights_val



AucCalc = CEvaluationAUC()
SigCalc = CEvaluationSignificance()




In [None]:

class CObjective:
    def __init__(self, filepath):
        self.filepath = filepath
        self.df = pd.DataFrame()
        self.df.to_csv(self.filepath, index=False)
        self.model = None


    def __call__(self, trial):
        start = time.time()
        tf.random.set_seed(10)


        model = Sequential()

        init = tf.keras.initializers.GlorotNormal(seed=10)

        n_layers = trial.suggest_int('n_layers',1,10)
        
        layer_info = []
        for i in range(n_layers):    
            activation = trial.suggest_categorical("activation_l{}".format(i),['tanh','relu','sigmoid'])
            n_units = trial.suggest_int("n_units_l{}".format(i),1,150)

            model.add(Dense(n_units, kernel_initializer=init, activation=activation))

            dropout = trial.suggest_float("dropout_l{}".format(i),0.0, 0.5)
            model.add(Dropout(dropout))
            
            layer_info.append([activation, n_units, dropout])


        model.add(Dense(1, kernel_initializer=init, activation = 'sigmoid'))

        lr = trial.suggest_float("lr", 1e-5, 1e-1)

        #optimizer = trial.suggest_categorical("optimizer",['adam','sgd'])
        optimizer = Adam(learning_rate=lr)

        loss_f = trial.suggest_categorical("loss_f",['mse','binary_crossentropy','binary_focal_crossentropy'])

        model.compile(loss=loss_f, optimizer=optimizer, metrics=['AUC'])
        
        callback_loss = EarlyStopping(monitor='loss', patience=4)
        callback_auc = EarlyStopping(monitor='auc', patience=4)
        history = model.fit(X_train, y_train, epochs = 50, batch_size = 100, 
                            callbacks=[callback_auc, callback_loss], verbose = 1 )
        self.model = model
        
        #calculate accuracy
        preds_train = model.predict(X_train)[:,0]
        preds_test = model.predict(X_test)[:,0]
    
        auc_train = AucCalc.evaluate(preds_train, y_train)
        auc_test = AucCalc.evaluate(preds_test, y_test)
        significance, thr, sig, bg = SigCalc.evaluate(preds_test, y_test, weights_test)

        end = time.time()
        elapsed_time = end - start
        
        n_epochs = len(history.history['loss'])
        
        print('n_epochs:', n_epochs)
        print('auc train:',auc_train)
        print('auc test:',auc_test)
        print('best significance:', significance)
        print('time:',elapsed_time)
        
        line = pd.DataFrame({
            'n_layers':[n_layers], 'lr' : [lr], 'loss_f' : [loss_f],           
            'n_epochs':[n_epochs], 'auc_train':[auc_train], 'auc_test': [auc_test],
            'sig_test':[significance], 'thr':[thr], 'n_sig': [sig], 'n_bg':[bg],
            'time':[elapsed_time], 'n_epochs' : [n_epochs]})
            
        for i in range(len(layer_info)):
            line['l'+str(i)+'_activation'] = layer_info[i][0] 
            line['l'+str(i)+'_nunits'] = layer_info[i][1] 
            line['l'+str(i)+'_dropout'] = layer_info[i][2] 
                        
            
        self.df = pd.concat([self.df, line])
        self.df.to_csv(self.filepath, index=False)
            

        return auc_test

    


for i in list(range(0,20)):    
    if i == 0:
        sampler = optuna.samplers.NSGAIISampler(seed=11)
        #sampler = optuna.samplers.TPESampler(consider_prior = True, seed=11 )
        study = optuna.create_study(direction='maximize', sampler = sampler)
    else:
        study = joblib.load(study_filename)
    
    study_filename = "nn_nsga/study" + str(i)
    df_filename = "nn_nsga/df" + str(i)
    
    obj = CObjective(df_filename)
    study.optimize(obj, n_trials=100)    

    joblib.dump(study, study_filename)
    
