# Optimize system with parameters

This is a template notebook for design of experiments for bayesian optimization.

Author: {{ cookiecutter.author_name }}
Created: {{ cookiecutter.timestamp }}

In [0]:
# Link to project experiments folder hypothesis_experiment_learnings.board (refresh and hit enter on this line to see the link)

## How to use the notebook

The following cells:
- specify objective and other metrics, the parameter search space, and constraints,
- set up the optimization algorithm,
- read prior results,
- suggest new trials,
- provide the current best guess for optimal parameters.

Thereby, the library Ax is used, c.f. https://ax.dev/

By default, the notebook is set up to run with an example. To see how it works, run the notebook (multiple times) without changing the code.

For your project, adjust the code in the linked cells with your objectives, variables, dataset etc. and then execute all cells in order.

Please refer to bayesian_optimization.board for detailed instructions.

In [0]:
# <halerium id="f63cd7f1-8fd8-402b-b112-1e58426d8f1b">
# Link to bayesian_optimization.board
# </halerium id="f63cd7f1-8fd8-402b-b112-1e58426d8f1b">


### Imports and general setup

In [0]:
import os

from datetime import datetime

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.cm as cm

from ax.service.ax_client import AxClient
from ax import RangeParameter, ChoiceParameter
from ax.exceptions.core import DataRequiredError, SearchSpaceExhausted
from ax.exceptions.generation_strategy import MaxParallelismReachedException
from ax.core.base_trial import TrialStatus
from ax.modelbridge.generation_strategy import GenerationStrategy, GenerationStep
from ax.modelbridge.registry import Models, ModelRegistryBase

import ax.plot as ax_plot

plt.style.use("dark_background")

### 2. Specify the data path

In [0]:
experiment_name = 'bayesian_optimization'  # please provide a name for the optimization experiment
# <halerium id="1a7b6868-dc38-410b-b343-3085fa005b2c">
data_dir = "./"           # please provide a name for saving the trial data for the experiment
# </halerium id="1a7b6868-dc38-410b-b343-3085fa005b2c">

data_file_name = os.path.join(data_dir,  f"data_{experiment_name}_running_trials.csv")
print(f"the trial data will be read from/stored in: {data_file_name}")

best_parameters_file_name = os.path.join(data_dir,  f"data_{experiment_name}_best_parameters.csv")
print(f"the best parameters will be read from/stored in: {best_parameters_file_name}")

### 3. Specify the metrics

In [0]:
# <halerium id="c1a853f4-9c82-4b74-b6ab-4a93a9981e84">
metrics = ["cost", "quality"]   # please provide a list of metrics
objective_name = "cost"         # please give the name for the objective to maximize or minimize (must be among provided metrics)
minimize = True                 # set to True if minimize, and to False if maximize objective
# </halerium id="c1a853f4-9c82-4b74-b6ab-4a93a9981e84">

if objective_name not in metrics:
    raise ValueError(f"Objective must be among provided metrics. "
                     f"Could not find objective_name={objective_name} in metrics={metrics}")

### 4. Specify the Parameters

In [0]:
# <halerium id="20dc4ade-a3f4-468a-be5c-13bd1f2569ed">
parameters = [
    # please insert information on parameters, their names, types, bounds, etc.
    {"name": "x1",   
     "type": "range",
     "bounds": [-1.0, 1.0],
     "value_type": "float",  # Optional, defaults to inference from type of "bounds".
     "log_scale": False,  # Optional, defaults to False.
    },
    {"name": "x2",   
     "type": "range",
     "bounds": [-1.0, 1.0],
     "value_type": "float",  # Optional, defaults to inference from type of "bounds".
     "log_scale": False,  # Optional, defaults to False.
    },
    {"name": "x3",   
     "type": "range",
     "bounds": [-1.0, 1.0],
     "value_type": "float",  # Optional, defaults to inference from type of "bounds".
     "log_scale": False,  # Optional, defaults to False.
    },
]
# </halerium id="20dc4ade-a3f4-468a-be5c-13bd1f2569ed">


### 5. Specify the Constraints

In [0]:
# <halerium id="f02ba1a1-150c-458b-8422-af735fab8595">
parameter_constraints = ["x1 + x2 <= 10"]      # provide any parameter constraints as inequalities
outcome_constraints = ["quality >= 1"]         # provide any constraints on the metrics
# </halerium id="f02ba1a1-150c-458b-8422-af735fab8595">


### 6. Specify the Schedule

In [0]:
# <halerium id="cd470dcc-ec35-4ae6-8fb6-fbf8f98d98a5">
suggest_new_trials = True         # set to `True` if you want new trials suggested, 
# </halerium id="cd470dcc-ec35-4ae6-8fb6-fbf8f98d98a5">
                                  # set to `False` if you just want to use existing results to estimate best parameters 

# <halerium id="361f7d6d-a7f9-4ecd-8fc0-5199285ee90a">
max_batch_size = 10               # please provide the max. number of trials in a batch
always_max_batch_size = True      # whether to force full batch size for suggested new trials
suggest_when_outstanding = False  # whether to suggest when trials are still outstanding

initial_n_trials = 5              # how many initial trials before Baysian optimization steps
# </halerium id="361f7d6d-a7f9-4ecd-8fc0-5199285ee90a">


### 7. Run the Trials

#### Complete outstanding trials

Note that the following cell contains code to invent results of any outstanding trials for demonstration purposes. 

For real applications, either
 - replace the cell with appropriate code for retrieving the actual trial results, or 
 - remove the cell entirely, if you intend to add the trial results to the data files in a different way.

In [0]:
# <halerium id="a5ba624b-73d5-409a-b355-d09c60649574">
if os.path.exists(data_file_name):
    data = pd.read_csv(data_file_name, index_col="index")

    data["cost_mean"] = data["x1"]**2 + data["x2"]**2 + data["x3"]**2
    data["cost_SEM"] = 0.1
    data["quality_mean"] = 2./(1 + np.exp(-data["x3"] + 0.2))
    data["quality_SEM"] = 0.01

    display(data)

    data.to_csv(data_file_name)
# </halerium id="a5ba624b-73d5-409a-b355-d09c60649574">


#### Read any existing data

In [0]:
from functions.bayesian_optimization import read_existing_data

n_trials, n_completed_trials, n_outstanding_trials, prior_data, parameter_columns, result_columns, data_columns = read_existing_data(parameters, metrics, data_file_name)

#### Set up client

In [0]:
from functions.bayesian_optimization import set_up_client

generation_strategy, ax_client = set_up_client(experiment_name, parameters, objective_name, minimize, parameter_constraints, outcome_constraints, max_batch_size, always_max_batch_size, n_trials, initial_n_trials)

#### Feed existing data to client

In [0]:
from functions.bayesian_optimization import feed_data_to_client

prior_trials = feed_data_to_client(prior_data, parameter_columns, result_columns, ax_client, metrics)

#### Suggest new trials

In [0]:
from functions.bayesian_optimization import suggest_new_trials

suggest_new_trials(experiment_name, n_outstanding_trials, suggest_new_trials, suggest_when_outstanding, max_batch_size, always_max_batch_size, ax_client, result_columns, data_columns, prior_trials, data_file_name, data_dir)

#### Estimate best parameters

In [0]:
from functions.bayesian_optimization import estimate_best_params

best_parameters_data = estimate_best_params(ax_client, best_parameters_file_name, data_dir, experiment_name, parameter_columns, metrics, n_completed_trials)

### 8. Get the Final Results

In [0]:
from functions.bayesian_optimization import show_results

# <halerium id="cd470dcc-ec35-4ae6-8fb6-fbf8f98d98a5">
show_results(ax_client, minimize, objective_name, best_parameters_data)
# </halerium id="cd470dcc-ec35-4ae6-8fb6-fbf8f98d98a5">
