# Strategy Serialization with BoFire

## Imports

In [1]:
import json
from pydantic import parse_obj_as


from bofire.data_models.domain.api import Domain, Outputs
from bofire.benchmarks.single import Himmelblau
from bofire.benchmarks.multi import DTLZ2
from bofire.data_models.strategies.api import SoboStrategy as SoboStrategyDataModel
from bofire.data_models.strategies.api import QnehviStrategy as QnehviStrategyDataModel
from bofire.data_models.strategies.api import RandomStrategy as RandomStrategyDataModel
from bofire.data_models.strategies.api import AnyStrategy
from bofire.data_models.acquisition_functions.api import qNEI
import bofire.strategies.api as stategies
from bofire.data_models.surrogates.api import BotorchSurrogates, SingleTaskGPSurrogate
from bofire.data_models.kernels.api import ScaleKernel, RBFKernel
from bofire.surrogates.trainable import TrainableSurrogate
from bofire.surrogates.diagnostics import CvResults2CrossValidationValues




  from .autonotebook import tqdm as notebook_tqdm


## Single Objective Problem Setup

In [2]:
benchmark = Himmelblau()
samples = benchmark.domain.inputs.sample(n=10)

# this is the training data
experiments = benchmark.f(samples, return_complete=True)

# this are the pending candidates
pending_candidates = benchmark.domain.inputs.sample(2)



## Random Strategy

The random strategy and other strategies that just inherit from `Strategy` and not `PredictiveStrategy` are special as they do not need defined output features in the domain and they do not need a call to `tell` before the `ask`. Furthermore they online provide input features in the candidates and no predictions for output features.

In [3]:
# setup the data model
domain = Domain(inputs=benchmark.domain.inputs)
strategy_data = RandomStrategyDataModel(domain=domain)

# we generate the json spec
jspec = strategy_data.json()

jspec

'{"type": "RandomStrategy", "domain": {"type": "Domain", "inputs": {"type": "Inputs", "features": [{"type": "ContinuousInput", "key": "x_1", "unit": null, "bounds": [-6.0, 6.0], "stepsize": null}, {"type": "ContinuousInput", "key": "x_2", "unit": null, "bounds": [-6.0, 6.0], "stepsize": null}]}, "outputs": {"type": "Outputs", "features": []}, "constraints": {"type": "Constraints", "constraints": []}}, "seed": null}'

In [4]:
# load it
strategy_data = parse_obj_as(AnyStrategy, json.loads(jspec))

# map it
strategy = stategies.map(strategy_data)

# ask it
df_candidates = strategy.ask(candidate_count=5)

# transform to spec
candidates = strategy.to_candidates(df_candidates)

candidates

[Candidate(inputValues={'x_1': InputValue(value=-3.2541032989181087), 'x_2': InputValue(value=2.479062598028874)}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value=4.8507730485032035), 'x_2': InputValue(value=-3.158785806677552)}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value=-3.591644121200164), 'x_2': InputValue(value=1.7947586005877065)}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value=-3.143457681284705), 'x_2': InputValue(value=-0.7094897378751179)}, outputValues=None),
 Candidate(inputValues={'x_1': InputValue(value=-0.14477162046763858), 'x_2': InputValue(value=-4.116229070856468)}, outputValues=None)]

## SOBO Strategy

Setup the strategies data model.

In [5]:
# setup the data model
strategy_data = SoboStrategyDataModel(domain=benchmark.domain, acquisition_function=qNEI())

# we generate the json spec
jspec = strategy_data.json()

jspec

'{"type": "SoboStrategy", "domain": {"type": "Domain", "inputs": {"type": "Inputs", "features": [{"type": "ContinuousInput", "key": "x_1", "unit": null, "bounds": [-6.0, 6.0], "stepsize": null}, {"type": "ContinuousInput", "key": "x_2", "unit": null, "bounds": [-6.0, 6.0], "stepsize": null}]}, "outputs": {"type": "Outputs", "features": [{"type": "ContinuousOutput", "key": "y", "unit": null, "objective": {"type": "MinimizeObjective", "w": 1.0, "bounds": [0, 1]}}]}, "constraints": {"type": "Constraints", "constraints": []}}, "seed": null, "num_sobol_samples": 512, "num_restarts": 8, "num_raw_samples": 1024, "descriptor_method": "EXHAUSTIVE", "categorical_method": "EXHAUSTIVE", "discrete_method": "EXHAUSTIVE", "surrogate_specs": {"surrogates": [{"hyperconfig": {"type": "SingleTaskGPHyperconfig", "hyperstrategy": "FactorialStrategy", "inputs": {"type": "Inputs", "features": [{"type": "CategoricalInput", "key": "kernel", "categories": ["rbf", "matern_1.5", "matern_2.5"], "allowed": [true, t

As SOBO is a predictive strategy, training data has to be provided before candidated can be requested. 

In [6]:
# load it
strategy_data = parse_obj_as(AnyStrategy, json.loads(jspec))

# map it
strategy = stategies.map(strategy_data)

# tell it the pending candidates if present
if pending_candidates is not None:
    strategy.add_candidates(pending_candidates)

# tell it
strategy.tell(experiments=experiments)

# ask it
df_candidates = strategy.ask(candidate_count=2)

# transform to spec
candidates = strategy.to_candidates(df_candidates)

candidates

[Candidate(inputValues={'x_1': InputValue(value=-6.0), 'x_2': InputValue(value=2.33197798218441)}, outputValues={'y': OutputValue(predictedValue=73.13600152762984, standardDeviation=165.42514150105228, objective=-73.13600152762984)}),
 Candidate(inputValues={'x_1': InputValue(value=6.0), 'x_2': InputValue(value=-0.578825528795338)}, outputValues={'y': OutputValue(predictedValue=204.0602202675799, standardDeviation=212.6461680389249, objective=-204.0602202675799)})]

We can also save the trained models of the strategy, for more info look at the `model_serial.ipynb` notebook. It could be that the `dumps` command fails here. But this is already fixed in the main branch of the `linear_operator` package, and if not yet, it should be available in main soon.

In [7]:
jsurrogate_spec = strategy_data.surrogate_specs.surrogates[0].json()
dump = strategy.surrogates.surrogates[0].dumps()

## MOBO Strategy

As example for a multiobjective strategy we are using here the Qnehvi stratey. Related strategies would be Qparego, MultiplicativeSobo etc. To use it, we have to first generate a multiobjective domain.

In [8]:
benchmark = DTLZ2(dim=6)
samples = benchmark.domain.inputs.sample(n=20)
experiments = benchmark.f(samples, return_complete=True)
pending_candidates = benchmark.domain.inputs.sample(2)


Now the strategy spec is setup. Note that we can define there exactly which model to use.

In [9]:
# setup the data model
strategy_data = QnehviStrategyDataModel(
    domain=benchmark.domain,
    surrogate_specs=BotorchSurrogates(
        surrogates=[
            SingleTaskGPSurrogate(
                inputs=benchmark.domain.inputs,
                outputs=Outputs(features=[benchmark.domain.outputs[0]]),
                kernel=ScaleKernel(base_kernel=RBFKernel(ard=False))
            )
        ]
    )
)

# we generate the json spec
jspec = strategy_data.json()

jspec

NameError: name 'Outputs' is not defined

Generate the candidates.

In [None]:
# load it
strategy_data = parse_obj_as(AnyStrategy, json.loads(jspec))

# map it
strategy = stategies.map(strategy_data)

# tell it the pending candidates if available
if pending_candidates is not None:
    strategy.add_candidates(pending_candidates)

# tell it
strategy.tell(experiments=experiments)

# ask it
df_candidates = strategy.ask(candidate_count=1)

# transform to spec
candidates = strategy.to_candidates(df_candidates)

candidates

[Candidate(inputValues={'x_0': InputValue(value=1.0), 'x_1': InputValue(value=0.0), 'x_2': InputValue(value=0.0), 'x_3': InputValue(value=1.0), 'x_4': InputValue(value=0.0), 'x_5': InputValue(value=1.0)}, outputValues={'f_0': OutputValue(predictedValue=0.05837048644034282, standardDeviation=0.18559375905314565, objective=-0.05837048644034282), 'f_1': OutputValue(predictedValue=1.097383607430488, standardDeviation=0.3443568727244193, objective=-1.097383607430488)})]

To fill the model info section accordingly, the following snippet has to be executed for every surrogate, incldung saving the actual models.

In [None]:
class TestMethod(BaseModel):
    type: str

class CrossValidation(TestMethod):
    type: Literal["CrossValidation"] = "CrossValidation"
    foldCount: int


for i in range(len(strategy_data.surrogate_specs.surrogates)):
    surrogate_data = strategy_data.surrogate_specs.surrogates[i]
    surrogate = strategy.surrogate_specs.surrogates[i]
    # get the spec
    jsurrogate_spec = surrogate_data.json()
    # get the dump
    dump = surrogate.dumps()
    # do the cross validation, only if we have a trainable model under the hood
    if isinstance(surrogate,TrainableSurrogate):
        cv_train, cv_test, _ = surrogate.cross_validate(strategy.experiments, folds=5)
        # transform the bofire objects to the backend objects
        testMethod = CrossValidation(foldCount=5)
        cvResultsTrain = CvResults2CrossValidationValues(cv_train)
        cvResultsTest = CvResults2CrossValidationValues(cv_test)
        metricsTrain = {surrogate.outputs[0].key: cv_train.get_metrics(combine_folds=False).describe().loc["mean"].to_dict()}
        metricsTest = {surrogate.outputs[0].key: cv_test.get_metrics(combine_folds=True).describe().loc["mean"].to_dict()}
        # save to backend
        # - jsurrogate_spec
        # - dump
        # - testMethod
        # - cvResultsTrain
        # - cvResultsTest
        # - metricsTrain
        # - metricsTest