# Goal

The goal of this notebook is to check if our variance estimators are biased.

# Method

Generate a sequence of observations from a normal distribution with a known mean and variance. Register them with the optimizer and check how well the predicted variance approximates the true variance.

In [1]:
from IPython.core.display import display, HTML
import warnings
display(HTML("<style>.container { width:100% !important; }</style>"))
warnings.simplefilter("ignore")

In [19]:
from mlos.Optimizers.BayesianOptimizer import BayesianOptimizer
from mlos.Optimizers.BayesianOptimizerConfigStore import bayesian_optimizer_config_store
from mlos.Optimizers.BayesianOptimizerFactory import BayesianOptimizerFactory
from mlos.Optimizers.OptimizationProblem import OptimizationProblem, Objective
from mlos.Spaces import SimpleHypergrid, ContinuousDimension

input_space = SimpleHypergrid(
    name="input",
    dimensions=[
        ContinuousDimension(name="x", min=-10, max=10)
    ]
)

output_space = SimpleHypergrid(
    name="objective",
    dimensions=[
        ContinuousDimension(name="y", min=-100, max=100)
    ]
)

optimization_problem = OptimizationProblem(
    parameter_space=input_space,
    objective_space=output_space,
    objectives=[Objective(name="y", minimize=False)]
)

optimizer_config = bayesian_optimizer_config_store.default
optimizer_config.homogeneous_random_forest_regression_model_config.samples_fraction_per_estimator = 0.5
optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 100

optimizer_factory = BayesianOptimizerFactory()
optimizer = optimizer_factory.create_local_optimizer(
    optimization_problem=optimization_problem
)

11/11/2020 19:25:44 -   BayesianOptimizerFactory -    INFO - [BayesianOptimizerFactory.py:  37 -    create_local_optimizer() ] Optimizer config not specified. Using default.
11/11/2020 19:25:44 -   BayesianOptimizerFactory -    INFO - [BayesianOptimizerFactory.py:  37 -    create_local_optimizer() ] Optimizer config not specified. Using default.


In [23]:
import numpy as np
import pandas as pd

num_observations = 1000

mean = 0.0
standard_deviation = 5.0

parameters_df = pd.DataFrame({'x': np.zeros(num_observations)})
targets_df = pd.DataFrame({'y': np.random.normal(mean, standard_deviation, num_observations)})

In [24]:
optimizer.register(parameters_df, targets_df)

In [25]:
parameters_for_prediction = pd.DataFrame({'x': [0]})
prediction = optimizer.predict(parameters_for_prediction)
prediction.get_dataframe()

Unnamed: 0,is_valid_input,predicted_value,predicted_value_variance,sample_variance,sample_size,predicted_value_degrees_of_freedom
0,True,-0.005183,0.012579,25.157765,10,19990
