# Strategy Serialization with BoFire

## Imports

In [1]:
import json
from pydantic import parse_obj_as, BaseModel
from typing import List, Dict, Optional, Literal


from bofire.data_models.domain.api import Inputs, Outputs, Domain
from bofire.benchmarks.single import Himmelblau
from bofire.benchmarks.multi import DTLZ2
from bofire.data_models.strategies.api import SoboStrategy as SoboStrategyDataModel
from bofire.data_models.strategies.api import QnehviStrategy as QnehviStrategyDataModel
from bofire.data_models.strategies.api import RandomStrategy as RandomStrategyDataModel
from bofire.data_models.strategies.api import AnyStrategy
from bofire.data_models.acquisition_functions.api import qNEI
import bofire.strategies.api as stategies
from bofire.data_models.surrogates.api import BotorchSurrogates, AnySurrogate, SingleTaskGPSurrogate
from bofire.data_models.kernels.api import ScaleKernel, RBFKernel




  from .autonotebook import tqdm as notebook_tqdm


## Single Objective Problem Setup

In [2]:
benchmark = Himmelblau()
samples = benchmark.domain.inputs.sample(n=10)

# this is the training data
experiments = benchmark.f(samples, return_complete=True)

# this are the pending candidates
pending_candidates = benchmark.domain.inputs.sample(2)



## Random Strategy

The random strategy and other strategies that just inherit from `Strategy` and not `PredictiveStrategy` are special as they do not need defined output features in the domain and they do not need a call to `tell` before the `ask`. Furthermore they online provide input features in the candidates and no predictions for output features.

In [3]:
# setup the data model
domain = Domain(input_features=benchmark.domain.input_features)
strategy_data = RandomStrategyDataModel(domain=domain)

# we generate the json spec
jspec = strategy_data.json()

jspec

'{"type": "RandomStrategy", "domain": {"type": "Domain", "input_features": {"type": "Inputs", "features": [{"type": "ContinuousInput", "key": "x_1", "unit": null, "bounds": [-6.0, 6.0]}, {"type": "ContinuousInput", "key": "x_2", "unit": null, "bounds": [-6.0, 6.0]}]}, "output_features": {"type": "Outputs", "features": []}, "constraints": {"type": "Constraints", "constraints": []}}, "seed": 696}'

In [4]:
# load it
strategy_data = parse_obj_as(AnyStrategy, json.loads(jspec))

# map it
strategy = stategies.map(strategy_data)

# ask it
df_candidates = strategy.ask(candidate_count=5)

# transform to spec
candidates = strategy.to_candidates(df_candidates)

candidates

[Candidate(inputValues={'x_1': InputValue(value=5.788180450670685), 'x_2': InputValue(value=4.007899870247898)}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value=4.814648686938762), 'x_2': InputValue(value=-0.32807166770742846)}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value=-5.743313685687972), 'x_2': InputValue(value=3.6680928599086116)}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value=-2.004589623914832), 'x_2': InputValue(value=-5.152624582882828)}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value=0.6130215781704162), 'x_2': InputValue(value=-2.068679875423895)}, outputValues=None)]

## SOBO Strategy

This will fail as SOBO is a predictive strategy which needs also output feature definitions, which is missing in the domain from before.

In [5]:
# setup the data model
strategy_data = SoboStrategyDataModel(domain=domain, acquisition_function=qNEI())

# we generate the json spec
jspec = strategy_data.json()

jspec

ValidationError: 1 validation error for SoboStrategy
domain
  no output feature specified (type=value_error)

Next try with a correct domain:

In [6]:
# setup the data model
strategy_data = SoboStrategyDataModel(domain=benchmark.domain, acquisition_function=qNEI())

# we generate the json spec
jspec = strategy_data.json()

jspec

'{"type": "SoboStrategy", "domain": {"type": "Domain", "input_features": {"type": "Inputs", "features": [{"type": "ContinuousInput", "key": "x_1", "unit": null, "bounds": [-6.0, 6.0]}, {"type": "ContinuousInput", "key": "x_2", "unit": null, "bounds": [-6.0, 6.0]}]}, "output_features": {"type": "Outputs", "features": [{"type": "ContinuousOutput", "key": "y", "objective": {"type": "MinimizeObjective", "w": 1.0, "lower_bound": 0, "upper_bound": 1}, "unit": null}]}, "constraints": {"type": "Constraints", "constraints": []}}, "seed": 3, "num_sobol_samples": 512, "num_restarts": 8, "num_raw_samples": 1024, "descriptor_method": "EXHAUSTIVE", "categorical_method": "EXHAUSTIVE", "discrete_method": "EXHAUSTIVE", "surrogate_specs": {"surrogates": [{"type": "SingleTaskGPSurrogate", "input_features": {"type": "Inputs", "features": [{"type": "ContinuousInput", "key": "x_1", "unit": null, "bounds": [-6.0, 6.0]}, {"type": "ContinuousInput", "key": "x_2", "unit": null, "bounds": [-6.0, 6.0]}]}, "output

This will fail as SOBO is a predictive strategy which means we have to provide training data before:

In [7]:
# load it
strategy_data = parse_obj_as(AnyStrategy, json.loads(jspec))

# map it
strategy = stategies.map(strategy_data)

# ask it
df_candidates = strategy.ask(candidate_count=2)

ValueError: Not enough experiments available to execute the strategy.

which is done by using the `tell` method:

In [8]:
# load it
strategy_data = parse_obj_as(AnyStrategy, json.loads(jspec))

# map it
strategy = stategies.map(strategy_data)

# tell it the pending candidates if present
strategy.add_candidates(pending_candidates)

# tell it
strategy.tell(experiments=experiments)

# ask it
df_candidates = strategy.ask(candidate_count=2)

# transform to spec
candidates = strategy.to_candidates(df_candidates)

candidates

ValueError: missing column y_pred

We can also save the trained models of the strategy, for more info look at the `model_serial.ipynb` notebook. It could be that the `dumps` command fails here. But this is already fixed in the main branch of the `linear_operator` package, and if not yet, it should be available in main soon.

In [9]:
jsurrogate_spec = strategy_data.surrogate_specs.surrogates[0].json()
dump = strategy.surrogate_specs.surrogates[0].dumps()

## MOBO Strategy

As example for a multiobjective strategy we are using here the Qnehvi stratey. Related strategies would be Qparego, MultiplicativeSobo etc. To use it, we have to first generate a multiobjective domain.

In [20]:
benchmark = DTLZ2(dim=6)
samples = benchmark.domain.inputs.sample(n=20)
experiments = benchmark.f(samples, return_complete=True)

Now the strategy spec is setup. Note that we can define there exactly which model to use.

In [21]:
# setup the data model
strategy_data = QnehviStrategyDataModel(
    domain=benchmark.domain,
    surrogate_specs=BotorchSurrogates(
        surrogates=[
            SingleTaskGPSurrogate(
                input_features=benchmark.domain.input_features,
                output_features=Outputs(features=[benchmark.domain.outputs[0]]),
                kernel=ScaleKernel(base_kernel=RBFKernel(ard=False))
            )
        ]
    )
)

# we generate the json spec
jspec = strategy_data.json()

jspec

'{"type": "QnehviStrategy", "domain": {"type": "Domain", "input_features": {"type": "Inputs", "features": [{"type": "ContinuousInput", "key": "x_0", "unit": null, "bounds": [0.0, 1.0]}, {"type": "ContinuousInput", "key": "x_1", "unit": null, "bounds": [0.0, 1.0]}, {"type": "ContinuousInput", "key": "x_2", "unit": null, "bounds": [0.0, 1.0]}, {"type": "ContinuousInput", "key": "x_3", "unit": null, "bounds": [0.0, 1.0]}, {"type": "ContinuousInput", "key": "x_4", "unit": null, "bounds": [0.0, 1.0]}, {"type": "ContinuousInput", "key": "x_5", "unit": null, "bounds": [0.0, 1.0]}]}, "output_features": {"type": "Outputs", "features": [{"type": "ContinuousOutput", "key": "f_0", "objective": {"type": "MinimizeObjective", "w": 1.0, "lower_bound": 0, "upper_bound": 1}, "unit": null}, {"type": "ContinuousOutput", "key": "f_1", "objective": {"type": "MinimizeObjective", "w": 1.0, "lower_bound": 0, "upper_bound": 1}, "unit": null}]}, "constraints": {"type": "Constraints", "constraints": []}}, "seed":

Generate the candidates.

In [22]:
# load it
strategy_data = parse_obj_as(AnyStrategy, json.loads(jspec))

# map it
strategy = stategies.map(strategy_data)

# tell it
strategy.tell(experiments=experiments)

# ask it
df_candidates = strategy.ask(candidate_count=1)

# transform to spec
candidates = strategy.to_candidates(df_candidates)

candidates

[Candidate(inputValues={'x_0': InputValue(value=1.0), 'x_1': InputValue(value=0.0), 'x_2': InputValue(value=0.0), 'x_3': InputValue(value=0.26778854448283496), 'x_4': InputValue(value=0.3845219911447761), 'x_5': InputValue(value=1.0)}, outputValues={'f_0': OutputValue(predictedValue=0.08972119036798198, standardDeviation=0.21218810974443847, objective=-0.08972119036798198), 'f_1': OutputValue(predictedValue=1.2433746467198457, standardDeviation=0.30556287125819065, objective=-1.2433746467198457)})]

Again the models can be saved. Note that we have two models here as we have two features in `domain.output_features`.

In [23]:
jsurrogate_specs = [surrogate.json() for surrogate in strategy_data.surrogate_specs.surrogates]
dumps = [surrogate.dumps() for surrogate in strategy.surrogate_specs.surrogates]

To fill the model info section accordingly, the following snippet has to be executed for every surrogate:

In [63]:
class CrossValidationValues(BaseModel):
    observed: List[float]
    predicted: List[float]
    standardDeviation: Optional[List[float]]

class TestMethod(BaseModel):
    type: str

class CrossValidation(TestMethod):
    type: Literal["CrossValidation"] = "CrossValidation"
    foldCount: int

surrogate = strategy.surrogate_specs.surrogates[0]

cv_train, cv_test, _ = surrogate.cross_validate(strategy.experiments, folds=5)

testMethod = CrossValidation(foldCount=5) 
metricsTrain = cv_train.get_metrics(combine_folds=False).describe().loc["mean"].to_dict()
metricsTest = cv_test.get_metrics(combine_folds=True).describe().loc["mean"].to_dict()

cv_train = cv_train._combine_folds()
cv_test = cv_test._combine_folds()

cvResultsTrain = {surrogate.output_features[0].key: CrossValidationValues(
    observed=cv_train.observed.tolist(),
    predicted=cv_train.predicted.tolist(),
    standardDeviation=cv_train.standard_deviation.tolist(),
    )
}
cvResultsTest = {surrogate.output_features[0].key: CrossValidationValues(
    observed=cv_test.observed.tolist(),
    predicted=cv_test.predicted.tolist(),
    standardDeviation=cv_test.standard_deviation.tolist(),
    )
}

The fields of interest are `cvResultsTrain`, `cvResultsTest`, `metricsTrain`, `metricsTest` and `testMethod`.