![](figures/sherpa-logo.png)

In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import sherpa
import pprint
import energyflow
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

pp = pprint.PrettyPrinter(indent=4)
INFO = "\033[96m" + "[INFO]" + "\033[00m\t"

In [2]:
!rm -rf Results
os.makedirs("Results/Models", exist_ok=True)

## Load Data

In [3]:
def load_data(args):
    X, Y = energyflow.qg_nsubs.load(num_data=-1, cache_dir='~/.energyflow')
    
    if args["preprocessing"] == "log":
        X = np.log(X)
    
    elif args["preprocessing"] == "standardize":
        X = (X - X.mean(axis=0)) / X.std(axis=0)
    
    elif args["preprocessing"] == "min_max":
        X = (X - X.min(axis=0)) / X.max(axis=0)
    
    return X, Y

## Build the Model

In [4]:
def build_model(args):
    # Create the input layer to the network
    x = model_input = tf.keras.layers.Input(shape=(45,))
    
    # Create a series of fully connected layers
    for i in range(args["number_of_layers"]):
        
        if args["batch_normalization"]:
            x = tf.keras.layers.BatchNormalization()(x)
        
        # Create a dense layer 
        x = tf.keras.layers.Dense(
            units=args["number_of_nodes"], 
            activation=args["activation"]
        )(x)
        
        # Dropout layers with probablity 
        x = tf.keras.layers.Dropout(args["dropout"])(x)
        
    # Create the final layer in the network
    model_output = tf.keras.layers.Dense(1, activation="sigmoid")(x)
    
    # Build the model graph
    model = tf.keras.Model(
        inputs=model_input, 
        outputs=model_output
    )
    
    # Compile the model with loss and optimizer
    model.compile(
        optimizer=tf.keras.optimizers.Adam(lr=args["learning_rate"]),
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
        metrics=[
            "accuracy", 
            tf.keras.metrics.AUC(name="auc"),
            tf.keras.metrics.TruePositives(name="true_positives"),
            tf.keras.metrics.FalsePositives(name="false_positives"),
        ]
    )
    
    return model

## Model Callbacks During Training

In [5]:
def get_callbacks(args, study, trial, monitor="val_loss", patience=25):
    callbacks = [
        study.keras_callback(
            trial, 
            objective_name=monitor,
            context_names=["accuracy", "val_accuracy", "auc", "val_auc",
                          "true_positives", "val_true_positives", 
                          "false_positives", "val_false_positives"]
        ),
        tf.keras.callbacks.EarlyStopping(
            monitor=monitor,
            patience=patience
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor=monitor,
            factor=args['learning_rate_decay'],
            patience=patience,
            verbose=1
        ),
        tf.keras.callbacks.ModelCheckpoint(
            "Results/Models/%05d" % trial.id,
            save_best_only=True,
            monitor=monitor
        )
    ]
    
    return callbacks

## Hyperpameters of Interest
![](figures/parameter_types.png)

In [6]:
# Define all the hyperparameters and their corresponding ranges
parameters = [
    sherpa.Choice("activation", ["relu", "elu"]),
    sherpa.Choice("batch_normalization", [0, 1]),
    sherpa.Continuous("dropout", [0, 1]),
    sherpa.Continuous("learning_rate_decay", [0., 0.5]),
    sherpa.Continuous("learning_rate", [0.0001, 0.1]),
    sherpa.Discrete("number_of_nodes", [32, 256]),
    sherpa.Discrete("number_of_layers", [3, 25]),
    sherpa.Choice("preprocessing", ["log", "min_max", "none", "standardize"]),
]

## Hyperparameter Search Algorithm
![](figures/available_algorithms.png)

In [7]:
# Define the search algorithm to use
alg = sherpa.algorithms.RandomSearch(max_num_trials=5)

In [8]:
study = sherpa.Study(
    parameters=parameters,
    algorithm=alg,
    lower_is_better=True,
    output_dir="Results"
)

INFO:sherpa.core:
-------------------------------------------------------
SHERPA Dashboard running. Access via
http://127.0.1.1:8880 if on a cluster or
http://localhost:8880 if running locally.
-------------------------------------------------------


 * Serving Flask app "sherpa.app.app" (lazy loading)
 * Environment: production
 * Debug mode: on
[2m   Use a production WSGI server instead.[0m


## Searching

In [9]:
for trial in study:
    
    # Print the hyperparameters for this trial
    print()
    print("=" * 100)
    print(INFO, "Trial #:", trial.id)
    pp.pprint(trial.parameters)
    
    
    # Load data from energyflow package and preprocess it
    X, Y = load_data(trial.parameters)
    

    # Train, Validation, Test Split
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=1)
    X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=1/9, random_state=1) # 0.11 x 0.9 = 0.1


    print(INFO, "Number of Training Samples:", X_train.shape[0])
    print(INFO, "Number of Validation Samples:", X_val.shape[0])
    print(INFO, "Number of Testing Samples:", X_test.shape[0])

    
    # Construct the model
    model = build_model(trial.parameters)
    
    
    try:
        # Train the model 
        model.fit(
            x=X_train, 
            y=Y_train,
            epochs=2,
            verbose=2,
            batch_size=1024,
            validation_data=(X_val, Y_val),
            callbacks=get_callbacks(trial.parameters, study, trial)
        )
        
        # Complete the trial
        study.finalize(trial)
    except:
        pass

# Save the results
study.save()


[96m[INFO][00m	 Trial #: 1
{   'activation': 'elu',
    'batch_normalization': 0,
    'dropout': 0.8099540848583364,
    'learning_rate': 0.024632178917560567,
    'learning_rate_decay': 0.40982465648764155,
    'number_of_layers': 18,
    'number_of_nodes': 173,
    'preprocessing': 'standardize'}
[96m[INFO][00m	 Number of Training Samples: 80000
[96m[INFO][00m	 Number of Validation Samples: 10000
[96m[INFO][00m	 Number of Testing Samples: 10000
Epoch 1/2


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: Results/Models/00001/assets


79/79 - 6s - loss: 1969504.5000 - accuracy: 0.5018 - auc: 0.5013 - true_positives: 18898.0000 - false_positives: 18846.0000 - val_loss: 1.5561 - val_accuracy: 0.4949 - val_auc: 0.5000 - val_true_positives: 0.0000e+00 - val_false_positives: 0.0000e+00
Epoch 2/2


INFO:tensorflow:Assets written to: Results/Models/00001/assets


79/79 - 6s - loss: 7195957760.0000 - accuracy: 0.4972 - auc: 0.4977 - true_positives: 17316.0000 - false_positives: 17633.0000 - val_loss: 0.8650 - val_accuracy: 0.4949 - val_auc: 0.5000 - val_true_positives: 0.0000e+00 - val_false_positives: 0.0000e+00

[96m[INFO][00m	 Trial #: 2
{   'activation': 'relu',
    'batch_normalization': 1,
    'dropout': 0.2946499869165903,
    'learning_rate': 0.0823779055345154,
    'learning_rate_decay': 0.40555561374685206,
    'number_of_layers': 20,
    'number_of_nodes': 240,
    'preprocessing': 'none'}
[96m[INFO][00m	 Number of Training Samples: 80000
[96m[INFO][00m	 Number of Validation Samples: 10000
[96m[INFO][00m	 Number of Testing Samples: 10000
Epoch 1/2


INFO:tensorflow:Assets written to: Results/Models/00002/assets


79/79 - 14s - loss: 0.8292 - accuracy: 0.5013 - auc: 0.5043 - true_positives: 19245.0000 - false_positives: 19234.0000 - val_loss: 225.7277 - val_accuracy: 0.4949 - val_auc: 0.5000 - val_true_positives: 0.0000e+00 - val_false_positives: 0.0000e+00
Epoch 2/2


INFO:tensorflow:Assets written to: Results/Models/00002/assets


79/79 - 13s - loss: 0.6913 - accuracy: 0.5041 - auc: 0.5058 - true_positives: 31102.0000 - false_positives: 30865.0000 - val_loss: 0.6932 - val_accuracy: 0.4949 - val_auc: 0.5000 - val_true_positives: 0.0000e+00 - val_false_positives: 0.0000e+00

[96m[INFO][00m	 Trial #: 3
{   'activation': 'relu',
    'batch_normalization': 0,
    'dropout': 0.4169923429959491,
    'learning_rate': 0.08043395342122629,
    'learning_rate_decay': 0.19768252638510087,
    'number_of_layers': 9,
    'number_of_nodes': 111,
    'preprocessing': 'standardize'}
[96m[INFO][00m	 Number of Training Samples: 80000
[96m[INFO][00m	 Number of Validation Samples: 10000
[96m[INFO][00m	 Number of Testing Samples: 10000
Epoch 1/2


INFO:tensorflow:Assets written to: Results/Models/00003/assets


79/79 - 3s - loss: 10.6682 - accuracy: 0.5006 - auc: 0.5001 - true_positives: 9988.0000 - false_positives: 10029.0000 - val_loss: 0.6931 - val_accuracy: 0.5051 - val_auc: 0.5000 - val_true_positives: 5051.0000 - val_false_positives: 4949.0000
Epoch 2/2
79/79 - 1s - loss: 0.8733 - accuracy: 0.5000 - auc: 0.4982 - true_positives: 11296.0000 - false_positives: 11387.0000 - val_loss: 0.6931 - val_accuracy: 0.5051 - val_auc: 0.5000 - val_true_positives: 5051.0000 - val_false_positives: 4949.0000

[96m[INFO][00m	 Trial #: 4
{   'activation': 'relu',
    'batch_normalization': 0,
    'dropout': 0.4986027045469802,
    'learning_rate': 0.09818392955886081,
    'learning_rate_decay': 0.10882831558918499,
    'number_of_layers': 5,
    'number_of_nodes': 184,
    'preprocessing': 'none'}
[96m[INFO][00m	 Number of Training Samples: 80000
[96m[INFO][00m	 Number of Validation Samples: 10000
[96m[INFO][00m	 Number of Testing Samples: 10000
Epoch 1/2


INFO:tensorflow:Assets written to: Results/Models/00004/assets


79/79 - 2s - loss: 4.0080 - accuracy: 0.4992 - auc: 0.4989 - true_positives: 19913.0000 - false_positives: 20065.0000 - val_loss: 0.6932 - val_accuracy: 0.4949 - val_auc: 0.5000 - val_true_positives: 0.0000e+00 - val_false_positives: 0.0000e+00
Epoch 2/2
79/79 - 1s - loss: 0.6943 - accuracy: 0.5013 - auc: 0.4995 - true_positives: 18449.0000 - false_positives: 18435.0000 - val_loss: 0.6940 - val_accuracy: 0.4949 - val_auc: 0.5000 - val_true_positives: 0.0000e+00 - val_false_positives: 0.0000e+00

[96m[INFO][00m	 Trial #: 5
{   'activation': 'relu',
    'batch_normalization': 1,
    'dropout': 0.36362886974984754,
    'learning_rate': 0.003565596742209928,
    'learning_rate_decay': 0.49634628911295353,
    'number_of_layers': 4,
    'number_of_nodes': 214,
    'preprocessing': 'min_max'}
[96m[INFO][00m	 Number of Training Samples: 80000
[96m[INFO][00m	 Number of Validation Samples: 10000
[96m[INFO][00m	 Number of Testing Samples: 10000
Epoch 1/2


INFO:tensorflow:Assets written to: Results/Models/00005/assets


79/79 - 3s - loss: 0.4902 - accuracy: 0.7772 - auc: 0.8499 - true_positives: 30586.0000 - false_positives: 8504.0000 - val_loss: 0.6311 - val_accuracy: 0.6351 - val_auc: 0.8570 - val_true_positives: 1568.0000 - val_false_positives: 166.0000
Epoch 2/2


INFO:tensorflow:Assets written to: Results/Models/00005/assets


79/79 - 3s - loss: 0.4576 - accuracy: 0.7931 - auc: 0.8676 - true_positives: 31260.0000 - false_positives: 7904.0000 - val_loss: 0.6068 - val_accuracy: 0.6547 - val_auc: 0.8619 - val_true_positives: 1761.0000 - val_false_positives: 163.0000


## Dashboard
<img src=figures/dashboard.jpg>

## Results

In [10]:
df = pd.read_csv("Results/results.csv")

In [11]:
df

Unnamed: 0,Trial-ID,Status,Iteration,activation,batch_normalization,dropout,learning_rate,learning_rate_decay,number_of_layers,number_of_nodes,preprocessing,Objective,accuracy,auc,false_positives,true_positives,val_accuracy,val_auc,val_false_positives,val_true_positives
0,1,INTERMEDIATE,0,elu,0,0.809954,0.024632,0.409825,18,173,standardize,1.556066,0.501787,0.501329,18846.0,18898.0,0.4949,0.5,0.0,0.0
1,1,INTERMEDIATE,1,elu,0,0.809954,0.024632,0.409825,18,173,standardize,0.865018,0.497175,0.497726,17633.0,17316.0,0.4949,0.5,0.0,0.0
2,1,COMPLETED,1,elu,0,0.809954,0.024632,0.409825,18,173,standardize,0.865018,0.497175,0.497726,17633.0,17316.0,0.4949,0.5,0.0,0.0
3,2,INTERMEDIATE,0,relu,1,0.29465,0.082378,0.405556,20,240,none,225.727707,0.501275,0.504287,19234.0,19245.0,0.4949,0.5,0.0,0.0
4,2,INTERMEDIATE,1,relu,1,0.29465,0.082378,0.405556,20,240,none,0.693196,0.5041,0.505795,30865.0,31102.0,0.4949,0.5,0.0,0.0
5,2,COMPLETED,1,relu,1,0.29465,0.082378,0.405556,20,240,none,0.693196,0.5041,0.505795,30865.0,31102.0,0.4949,0.5,0.0,0.0
6,3,INTERMEDIATE,0,relu,0,0.416992,0.080434,0.197683,9,111,standardize,0.693095,0.500625,0.500069,10029.0,9988.0,0.5051,0.5,4949.0,5051.0
7,3,INTERMEDIATE,1,relu,0,0.416992,0.080434,0.197683,9,111,standardize,0.693141,0.5,0.498173,11387.0,11296.0,0.5051,0.5,4949.0,5051.0
8,3,COMPLETED,1,relu,0,0.416992,0.080434,0.197683,9,111,standardize,0.693095,0.500625,0.500069,10029.0,9988.0,0.5051,0.5,4949.0,5051.0
9,4,INTERMEDIATE,0,relu,0,0.498603,0.098184,0.108828,5,184,none,0.69325,0.499238,0.498857,20065.0,19913.0,0.4949,0.5,0.0,0.0


## Resources
---
### [SHERPA Docs](https://parameter-sherpa.readthedocs.io/en/latest/)
![](figures/docs.png)


### [SHERPA Paper](https://www.sciencedirect.com/science/article/pii/S2352711020303046)
![](figures/paper.png)


### [SHERPA GitHub](https://github.com/sherpa-ai/sherpa)