# Optimize system with parameters

This is a template notebok for classical design of experiments for optimization.

## How to use the notebook

The following cells:
- specify objective and other metrics, the parameter search space, and constraints,
- set up the optimization algorithm,
- read trial results,
- provide the current best guess for optimal parameters.

To just see how it works for a toy example,
  1. run an unaltered version of the notebook "classical_doe.ipynb",
  2. run this notebook as is.

For your own project, adjust the details about objectives, parameters, etc. and then execute all cells in order.
Make sure, the details about project, objective, etc. specified here match those in the notebook "classical_doe.ipynb".

## Imports and general setup

In [0]:
import os

from datetime import datetime

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.cm as cm

from ax.service.ax_client import AxClient
from ax import RangeParameter, ChoiceParameter
from ax.exceptions.core import DataRequiredError, SearchSpaceExhausted
from ax.exceptions.generation_strategy import MaxParallelismReachedException
from ax.core.base_trial import TrialStatus
from ax.modelbridge.generation_strategy import GenerationStrategy, GenerationStep
from ax.modelbridge.registry import Models, ModelRegistryBase

import ax.plot as ax_plot

plt.style.use("dark_background")


## Project

In [0]:
experiment_name = "test"  # please provide a name for the optimization experiment
data_dir = "./"           # please provide a name for saving the trial data for the experiment

data_file_name = os.path.join(data_dir,  f"data_{experiment_name}_running_trials.csv")

if os.path.exists(data_file_name):
    print(f"the trial data will be read from: {data_file_name}")
else:
   print(f"file for trial data not found: {data_file_name}")
   raise RuntimeError("No trial data found.")

best_parameters_file_name = os.path.join(data_dir,  f"data_{experiment_name}_best_parameters.csv")
print(f"the best parameters will be read from/stored in: {best_parameters_file_name}")


## Metrics and objective

In [0]:
metrics = ["cost", "quality"]   # please provide a list of metrics
objective_name = "cost"         # please give the name for the objective to maximize or minimize (must be among provided metrics)
minimize = True                 # set to True if minimize, and to False if maximize objective

if objective_name not in metrics:
    raise ValueError(f"Objective must be among provided metrics. "
                     f"Could not find objective_name={objective_name} in metrics={metrics}")


## Parameters

In [0]:
parameters = [
    # please insert the information on the names and bound/values of the parameters to try:
    {
        "name": "x1",           # the name of the parameter
        "type": "range",        # the type of parameter: "range" is for continuous parameters
        "bounds": [0., 1.],     # the lower and upper bound of the parameter as a tuple for range parameters
    },
    {
        "name": "x2",
        "type": "range",
        "bounds": [0., 10.],
    },  
    {
        "name": "x3",
        "type": "range",
        "bounds": [-5., 5.],
    },  
#    {
#       "name": "x4",
#        "type": "choice",                    # the type of parameter: "choice" is for discrete parameters
#        "values": ["up", "down", "stange"],  # the values to try from for parameter
#        "is_ordered": False,                 # whether values are ordered
#    },   
]

## Constraints

In [0]:
parameter_constraints = []                     # provide any parameter constraints as inequalities
outcome_constraints = ["quality >= 1"]         # provide any constraints on the metrics


## Best parameter estimation

### Complete outstanding trials

Note that the following cell contains code to invent trial results for demonstration purposes. 

For real applications, either
 - replace the cell with appropriate code for retrieving the actual trial results, or 
 - remove the cell entirely, if you intend to add the trial results to the data files in a different way.

In [0]:
data = pd.read_csv(data_file_name, index_col="index")

cost_for = {"up": 1, "down": 2, "strange": 3}


data["cost_mean"] = (data["x1"] - 0.6)**2 + 0.1 * (data["x2"] - 7.)**2  + 0.3 *(data["x3"] - 2.)**2 #  + data["x4"].map(lambda x: cost_for.get(x, 4))
data["cost_SEM"] = 0.
data["quality_mean"] = 2./(1 + np.exp(-data["x2"] + 2))
data["quality_SEM"] = 0.

display(data)

data.to_csv(data_file_name)


### Read trial  data

In [0]:
parameter_columns = [parameter["name"] for parameter in parameters ] 
result_columns    = [metric + suffix for metric in metrics for suffix in ("_mean", "_SEM")]
data_columns      = parameter_columns + result_columns

n_trials = 0
n_completed_trials = 0
n_outstanding_trials = 0
prior_data = None

if os.path.exists(data_file_name):
    print(f"reading prior data from {data_file_name}...")
    prior_data = pd.read_csv(data_file_name, index_col="index")

    missing_colums = set(data_columns) - set(prior_data.columns)
    if missing_colums:
        raise ValueError(f"data file missing colums: {missing_colums}.")
    prior_data = prior_data[data_columns]   

    n_trials = len(prior_data[parameter_columns].dropna(axis='index', how='any'))
    n_completed_trials = len(prior_data.dropna(axis='index', how='any'))
    n_outstanding_trials = n_trials - n_completed_trials

else:
    print("no prior data.")

### Set up client

In [0]:
generation_strategy=GenerationStrategy([
        # Bayesian optimization step (requires data obtained from previous phase and learns
        # from all data available at the time of each new candidate generation call)
        GenerationStep(
            model=Models.GPEI,
            num_trials=-1,  # No limitation on how many trials should be produced from this step
            max_parallelism=3,  # Parallelism limit for this step, often lower than for Sobol
        ),
    ])

ax_client = AxClient(generation_strategy=generation_strategy)

ax_client.create_experiment(
    name=experiment_name,
    parameters=parameters,
    objective_name=objective_name,
    minimize=minimize,
    parameter_constraints=parameter_constraints,
    outcome_constraints=outcome_constraints,
)


### Feed data to client

In [0]:
prior_trials = dict()
if prior_data is not None:
    for index, trial_data in prior_data.iterrows():

        trial_parameters = trial_data[parameter_columns]
        if any(trial_parameters.isna()):
            missing_trial_parameters = ", ".join(trial_parameters[trial_parameters.isna()].index)
            print(f"row {index}: missing parameter values for: {missing_trial_parameters}.")
            continue

        trial_parameters = trial_parameters.to_dict()
        trial_parameters, trial_index = ax_client.attach_trial(parameters=trial_parameters)

        trial_results = trial_data[result_columns]
        if any(trial_results.isna()):
            missing_results = ", ".join(trial_results[trial_results.isna()].index)
            print(f"row {index}: outstanding results for: {missing_results}.")
        else:
            raw_data = dict()
            for metric in metrics:
                metric_mean = trial_results[metric + "_mean"]
                metric_SEM  = trial_results[metric + "_SEM"]
                raw_data[metric] = (metric_mean, metric_SEM)
            ax_client.complete_trial(trial_index=trial_index, raw_data=raw_data)

        trial_results = trial_results.to_dict()
        prior_trials[trial_index] = {**trial_parameters, **trial_results}


### Estimate best parameters

In [0]:
ax_client.get_next_trial()

if os.path.exists(best_parameters_file_name):
    prior_best_parameters_data = pd.read_csv(best_parameters_file_name) 
else:
    prior_best_parameters_data = pd.DataFrame(columns=["n_completed_trials"] + parameter_columns + metrics)


best_parameters_result = ax_client.get_best_parameters()
if best_parameters_result is None:
    best_parameters = None
    means = None
    covariances = None
    new_best_parameters_data = pd.DataFrame(columns=["n_completed_trials"] + parameter_columns + metrics)
else:
    best_parameters, (means, covariances) = best_parameters_result
    new_best_parameters_data = pd.DataFrame.from_records(({
        "n_completed_trials": n_completed_trials,
        **best_parameters, **means
    },))


best_parameters_data = prior_best_parameters_data.append(new_best_parameters_data)
if os.path.exists(best_parameters_file_name):
    dt = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    os.rename(best_parameters_file_name, os.path.join(data_dir,  f"data_{experiment_name}_best_parameters_{dt}.csv"))
best_parameters_data.to_csv(best_parameters_file_name, index=False)

if len(best_parameters_data) > 0:
    print("\nbest parameters so far (from oldest to most recent):")
    display(best_parameters_data)
else:
    print("no best parameters yet.")

