# Model Building with BoFire

This notebooks shows how to setup and analyze models trained with BoFire. It is stil WIP.

## Imports

In [27]:
from bofire.data_models.features.api import (
    ContinuousInput,
    ContinuousOutput,
)
from bofire.data_models.domain.api import Inputs, Outputs
from bofire.data_models.surrogates.api import SingleTaskGPSurrogate, RandomForestSurrogate, MixedSingleTaskGPSurrogate, AnySurrogate
from bofire.benchmarks.single import Himmelblau
import bofire.surrogates.api as surrogates
import json
from pydantic import parse_obj_as

## Problem Setup

For didactic purposes, we sample data from a Himmelblau benchmark function and use them to train a SingleTaskGP.

In [2]:
benchmark = Himmelblau()
samples = benchmark.domain.inputs.sample(n=50)
experiments = benchmark.f(samples, return_complete=True)

experiments.head(10)

Unnamed: 0,x_1,x_2,y,valid_y
0,-3.417554,1.611629,66.405726,1
1,-2.914634,-0.074028,104.842241,1
2,0.485777,3.769593,108.144406,1
3,1.591443,-0.998751,109.06369,1
4,-3.663129,-1.353431,79.127229,1
5,2.75147,2.151108,1.77751,1
6,-3.243289,-1.865197,51.26114,1
7,1.526138,1.303139,68.539797,1
8,-2.564676,1.711192,51.393977,1
9,-1.220227,3.54293,54.385706,1


## Model Fitting


In [3]:
input_features = benchmark.domain.input_features
output_features = benchmark.domain.output_features

In [4]:
input_features.json()

'{"type": "Inputs", "features": [{"type": "ContinuousInput", "key": "x_1", "lower_bound": -4.0, "upper_bound": 4.0}, {"type": "ContinuousInput", "key": "x_2", "lower_bound": -4.0, "upper_bound": 4.0}]}'

In [7]:
output_features.json()

'{"type": "Outputs", "features": [{"type": "ContinuousOutput", "key": "y", "objective": {"type": "MaximizeObjective", "w": 1.0, "lower_bound": 0, "upper_bound": 1}}]}'

### Single Task GP

Generate the json spec

In [28]:
# we setup the data model, here a Single Task GP
surrogate_data = SingleTaskGPSurrogate(
    input_features=input_features,
    output_features=output_features
)

# we generate the json spec
jspec = surrogate_data.json()

jspec

'{"type": "SingleTaskGPSurrogate", "input_features": {"type": "Inputs", "features": [{"type": "ContinuousInput", "key": "x_1", "lower_bound": -4.0, "upper_bound": 4.0}, {"type": "ContinuousInput", "key": "x_2", "lower_bound": -4.0, "upper_bound": 4.0}]}, "output_features": {"type": "Outputs", "features": [{"type": "ContinuousOutput", "key": "y", "objective": {"type": "MaximizeObjective", "w": 1.0, "lower_bound": 0, "upper_bound": 1}}]}, "input_preprocessing_specs": {}, "kernel": {"type": "ScaleKernel", "base_kernel": {"type": "MaternKernel", "ard": true, "nu": 2.5, "lengthscale_prior": {"type": "GammaPrior", "concentration": 3.0, "rate": 6.0}}, "outputscale_prior": {"type": "GammaPrior", "concentration": 2.0, "rate": 0.15}}, "scaler": "NORMALIZE"}'

Load it from the spec

In [31]:
surrogate_data = parse_obj_as(AnySurrogate, json.loads(jspec))

Map it 

In [32]:
surrogate = surrogates.map(surrogate_data)

Fit it. This is not 100% finished. In the future we will call here hyperfit which will return the CV results etc. This has to be finished. So ignore this for now and just call fit.

In [8]:
surrogate.fit(experiments=experiments)

Dump it.

In [9]:
# dump it
dump = surrogate.dumps()

Make predictions.

In [10]:
# predict with it
df_predictions = surrogate.predict(experiments)
# transform to spec
predictions = surrogate.to_predictions(predictions=df_predictions)

Load again from spec and dump and make predictions.

In [34]:
surrogate_data = parse_obj_as(AnySurrogate, json.loads(jspec))
surrogate = surrogates.map(surrogate_data)
surrogate.loads(dump)

# predict with it
df_predictions2 = surrogate.predict(experiments)
# transform to spec
predictions2 = surrogate.to_predictions(predictions=df_predictions2)

# check for equality
predictions==predictions2
