In [1]:
from IPython.core.display import display, HTML
import math
import pandas as pd

import mlos.global_values as global_values
from mlos.Optimizers.BayesianOptimizerFactory import BayesianOptimizerFactory
from mlos.Optimizers.BayesianOptimizerConfigStore import bayesian_optimizer_config_store
from mlos.Optimizers.OptimizationProblem import OptimizationProblem, Objective
from mlos.Spaces import ContinuousDimension, Point, SimpleHypergrid
from mlos.Tracer import Tracer

display(HTML("<style>.container { width:100% !important; }</style>"))

global_values.declare_singletons()
# global_values.tracer = Tracer(actor_id="OptimizerEvaluationTools", thread_id=0)

# Let's stand up the Optimizer Microservice
#
optimizer_factory = BayesianOptimizerFactory()

In [2]:
# Let's make a meta optimizer.
#
meta_optimizer_config = bayesian_optimizer_config_store.get_config_by_name("default_with_glow_worm")
meta_optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 100
meta_optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 1
meta_optimizer_config.experiment_designer_config.fraction_random_suggestions = 0.2

meta_optimizer = optimizer_factory.create_local_optimizer(
    optimizer_config=meta_optimizer_config,
    optimization_problem=OptimizationProblem(
        parameter_space=bayesian_optimizer_config_store.parameter_space,
        objective_space=SimpleHypergrid(
            name="predictions",
            dimensions=[
                ContinuousDimension(name="optimum_value_after_100_iterations", min=-math.inf, max=math.inf)
            ]
        ),
        objectives=[Objective(name='optimum_value_after_100_iterations', minimize=True)]
    )
)

09/29/2020 18:55:57 -   BayesianOptimizerFactory -    INFO - [BayesianOptimizerFactory.py:  40 -    create_local_optimizer() ] Creating a bayesian optimizer with config: {
  "surrogate_model_implementation": "HomogeneousRandomForestRegressionModel",
  "experiment_designer_implementation": "ExperimentDesigner",
  "min_samples_required_for_guided_design_of_experiments": 10,
  "homogeneous_random_forest_regression_model_config.n_estimators": 100,
  "homogeneous_random_forest_regression_model_config.features_fraction_per_estimator": 1,
  "homogeneous_random_forest_regression_model_config.samples_fraction_per_estimator": 1,
  "homogeneous_random_forest_regression_model_config.regressor_implementation": "DecisionTreeRegressionModel",
  "homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.criterion": "mse",
  "homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.splitter": "best",
  "homogeneous_random_forest_regression_mo

In [3]:
from mlos.OptimizerEvaluationTools.ObjectiveFunctionFactory import ObjectiveFunctionFactory
from mlos.OptimizerEvaluationTools.ObjectiveFunctionConfigStore import objective_function_config_store
objective_function_config = objective_function_config_store.get_config_by_name('three_level_quadratic')
objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config)

print(objective_function_config.to_json(indent=2))

{
  "implementation": "ThreeLevelQuadratic"
}


In [4]:
print(objective_function.parameter_space)

  Name: three_level_quadratic_config
  Dimensions:
    vertex_height: {low, 5, 15}

  IF vertex_height IN {low} THEN (
    Name: low_quadratic_params
    Dimensions:
      x_1: [-100.00, 100.00]
      x_2: [-100.00, 100.00]
  )

  IF vertex_height IN {5} THEN (
    Name: medium_quadratic_params
    Dimensions:
      x_1: [-100.00, 100.00]
      x_2: [-100.00, 100.00]
  )

  IF vertex_height IN {15} THEN (
    Name: high_quadratic_params
    Dimensions:
      x_1: [-100.00, 100.00]
      x_2: [-100.00, 100.00]
  )


In [5]:
from mlos.Logger import create_logger
from mlos.Optimizers.RegressionModels.RegressionModelFitState import RegressionModelFitState

# Let us set up the lists to track optima over time.
#
best_observation_num_observations = []
best_observation_configs = []
best_observations = []

predicted_value_num_observations = []
best_predicted_value_configs = []
best_predicted_values = []

regression_model_fit_state = RegressionModelFitState()

In [6]:
logger = create_logger("Optimizer evaluation.")
i = 0

In [None]:
from mlos.Optimizers.OptimumDefinition import OptimumDefinition
from mlos.Optimizers.RegressionModels.GoodnessOfFitMetrics import GoodnessOfFitMetrics, DataSetType



start_iteration_num = i
end_iteration_num = start_iteration_num + 20

for outer_loop_iteration in range(start_iteration_num, end_iteration_num):
    inner_optimizer_config = meta_optimizer.suggest()
    
    inner_optimizer = optimizer_factory.create_local_optimizer(
        optimizer_config=inner_optimizer_config,
        optimization_problem=OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)]
        )
    )
    
    ############################################# INNER OPTIMIZATION #############################################
    
    try:
        for i in range(100):
            parameters = inner_optimizer.suggest()
            objectives = objective_function.evaluate_point(parameters)
            logger.info(f"[{i+1}/{100}]Parameters: {parameters}, objectives: {objectives}")
            inner_optimizer.register(parameters.to_dataframe(), objectives.to_dataframe())
        best_observation_config, best_observation = inner_optimizer.optimum(OptimumDefinition.BEST_OBSERVATION)
        
        logger.info(f"[{outer_loop_iteration+1}/{end_iteration_num}] {best_observation}")
        meta_optimizer.register(inner_optimizer_config.to_dataframe(), Point(optimum_value_after_100_iterations=best_observation.y).to_dataframe())
    
    except:
        logger.error("Failed to complete inner optimization.", exc_info=True)
    
    ############################################# INNER OPTIMIZATION #############################################
    
    
    if meta_optimizer.trained:
        gof_metrics = meta_optimizer.compute_surrogate_model_goodness_of_fit()
        regression_model_fit_state.set_gof_metrics(data_set_type=DataSetType.TRAIN, gof_metrics=gof_metrics)
        
    best_observation_num_observations.append(outer_loop_iteration)
    
    best_observation_config, best_observation = meta_optimizer.optimum(OptimumDefinition.BEST_OBSERVATION)    
    best_observation_configs.append(best_observation_config)
    best_observations.append(best_observation)
    
    try:
        best_predicted_value_config, best_predicted_value = meta_optimizer.optimum(OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG)
        best_predicted_value_configs.append(best_predicted_value_config)
        best_predicted_values.append(best_predicted_value)
        predicted_value_num_observations.append(outer_loop_iteration)
    except:
        pass
    

09/29/2020 18:55:57 -   BayesianOptimizerFactory -    INFO - [BayesianOptimizerFactory.py:  40 -    create_local_optimizer() ] Creating a bayesian optimizer with config: {
  "surrogate_model_implementation": "HomogeneousRandomForestRegressionModel",
  "experiment_designer_implementation": "ExperimentDesigner",
  "min_samples_required_for_guided_design_of_experiments": 56,
  "homogeneous_random_forest_regression_model_config.n_estimators": 126,
  "homogeneous_random_forest_regression_model_config.features_fraction_per_estimator": 0.4507923548206544,
  "homogeneous_random_forest_regression_model_config.samples_fraction_per_estimator": 0.3273192317976761,
  "homogeneous_random_forest_regression_model_config.regressor_implementation": "DecisionTreeRegressionModel",
  "homogeneous_random_forest_regression_model_config.bootstrap": 0,
  "homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.criterion": "mae",
  "homogeneous_random_forest_regression_model_conf

09/29/2020 18:55:57 -      Optimizer evaluation. -    INFO - [<ipython-input-7-9a4145fa58c9>:  27 -                  <module>() ] [19/100]Parameters: {"vertex_height": 5, "medium_quadratic_params.x_1": 69.88838830949854, "medium_quadratic_params.x_2": 25.844485090875068}, objectives: {"y": 5557.324230111716}
09/29/2020 18:55:57 -      Optimizer evaluation. -    INFO - [<ipython-input-7-9a4145fa58c9>:  27 -                  <module>() ] [20/100]Parameters: {"vertex_height": 15, "high_quadratic_params.x_1": 76.47403632461328, "high_quadratic_params.x_2": -85.81013693467978}, objectives: {"y": 13226.657832526766}
09/29/2020 18:55:57 -      Optimizer evaluation. -    INFO - [<ipython-input-7-9a4145fa58c9>:  27 -                  <module>() ] [21/100]Parameters: {"vertex_height": "low", "low_quadratic_params.x_1": -76.04097346406269, "low_quadratic_params.x_2": 74.70387856739299}, objectives: {"y": 11362.899118374084}
09/29/2020 18:55:57 -      Optimizer evaluation. -    INFO - [<ipython-in

09/29/2020 18:55:57 -      Optimizer evaluation. -    INFO - [<ipython-input-7-9a4145fa58c9>:  27 -                  <module>() ] [46/100]Parameters: {"vertex_height": 15, "high_quadratic_params.x_1": -42.540460241159714, "high_quadratic_params.x_2": 72.23111864315422}, objectives: {"y": 7042.0252579711105}
09/29/2020 18:55:57 -      Optimizer evaluation. -    INFO - [<ipython-input-7-9a4145fa58c9>:  27 -                  <module>() ] [47/100]Parameters: {"vertex_height": 15, "high_quadratic_params.x_1": -10.138010334382173, "high_quadratic_params.x_2": 57.00765712217046}, objectives: {"y": 3367.652224098992}
09/29/2020 18:55:57 -      Optimizer evaluation. -    INFO - [<ipython-input-7-9a4145fa58c9>:  27 -                  <module>() ] [48/100]Parameters: {"vertex_height": 5, "medium_quadratic_params.x_1": -49.88941551388662, "medium_quadratic_params.x_2": -27.27541634964024}, objectives: {"y": 3237.902117363453}
09/29/2020 18:55:57 -      Optimizer evaluation. -    INFO - [<ipython-i

In [None]:
best_observation_config, best_observation

In [None]:
best_predicted_value_config, best_predicted_value

In [None]:
# Best observation dataframe
#
best_observation_df = pd.DataFrame([observation.to_dict() for observation in best_observations])
best_observation_df['num_observations'] = best_observation_num_observations
best_observation_df = pd.concat([best_observation_df.drop_duplicates(subset=['optimum_value_after_100_iterations'], keep='last'), best_observation_df.drop_duplicates(subset=['optimum_value_after_100_iterations'], keep='first')]).sort_index()

In [None]:
best_predicted_value_df = pd.DataFrame([predicted_value.to_dict() for predicted_value in best_predicted_values])
best_predicted_value_df['num_observations'] = predicted_value_num_observations
best_predicted_value_df = pd.concat([best_predicted_value_df.drop_duplicates(subset=['predicted_value'], keep='last'), best_predicted_value_df.drop_duplicates(subset=['predicted_value'], keep='first')]).sort_index()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
%matplotlib inline

fig, axs = plt.subplots(1, figsize=(11, 20), dpi=80, sharex=True)

axs.plot(best_observation_df['num_observations'], best_observation_df['optimum_value_after_100_iterations'], label='optimum_value_after_100_iterations')
axs.plot(best_predicted_value_df['num_observations'], best_predicted_value_df['predicted_value'], label='predicted_value')
axs.set_ylabel('y')
axs.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))
axs.set_xticks(best_observation_df['num_observations'][::2])
axs.grid(True)
axs.set_xlabel('num_observations')
axs.legend()  
fig.show()

In [None]:
from mlos.Optimizers.RegressionModels.GoodnessOfFitMetrics import DataSetType

# Let's take a look at goodness of fit data.
#
goodness_of_fit_dataframe = regression_model_fit_state.get_goodness_of_fit_dataframe(data_set_type=DataSetType.TRAIN) # TODO: add support to evaluate GoF on test data

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
%matplotlib inline

gof_df = goodness_of_fit_dataframe
columns_to_plot = [name for name in gof_df.columns.values if name not in ('observation_count', 'prediction_count', 'last_refit_iteration_number')]
num_plots = len(columns_to_plot)
fig, axs = plt.subplots(num_plots, figsize=(11, 20), dpi=80, sharex=True)

for i, column in enumerate(columns_to_plot):
    axs[i].plot(gof_df['last_refit_iteration_number'], gof_df[column], marker='o', label=column)
    axs[i].set_ylabel(column)
    axs[i].yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))
    axs[i].set_xticks(gof_df['last_refit_iteration_number'])
    axs[i].grid(True)
    if i == num_plots - 1:
        axs[i].set_xlabel('last_refit_iteration_number')
        
fig.show()