# Model Building with BoFire

This notebooks shows how to setup and analyze models trained with BoFire. It is stil WIP.

## Imports

In [1]:
from bofire.data_models.domain.api import Inputs, Outputs
from bofire.data_models.features.api import ContinuousInput, ContinuousOutput
from bofire.data_models.surrogates.api import SingleTaskGPSurrogate
import bofire.surrogates.api as surrogates
from bofire.data_models.enum import RegressionMetricsEnum
from bofire.surrogates.feature_importance import permutation_importance_hook, combine_permutation_importances
from bofire.plot.feature_importance import plot_feature_importance_by_feature_plotly

  from .autonotebook import tqdm as notebook_tqdm


## Problem Setup

For didactic purposes, we sample data from a Himmelblau benchmark function and use them to train a SingleTaskGP.

In [2]:
# Todo: replace this after JDs PR is ready.
input_features = Inputs(
        features=[
            ContinuousInput(key=f"x_{i+1}", bounds = (-4,4))
            for i in range(3)
        ]
    )
output_features = Outputs(features=[ContinuousOutput(key="y")])
experiments = input_features.sample(n=50)
experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True)
experiments["valid_y"] = 1

## Cross Validation
### Run the cross validation

In [3]:
data_model = SingleTaskGPSurrogate(
    input_features=input_features,
    output_features=output_features,
)

model = surrogates.map(data_model=data_model)
train_cv, test_cv, pi = model.cross_validate(experiments, folds=5, hooks={"pemutation_imprtance": permutation_importance_hook})

In [4]:
combine_permutation_importances(pi["pemutation_imprtance"], RegressionMetricsEnum.R2).describe()

Unnamed: 0,x_1,x_2,x_3
count,5.0,5.0,5.0
mean,1.227729,0.974822,-0.003041
std,0.672889,0.329523,0.005931
min,0.335321,0.654728,-0.010206
25%,0.916784,0.690398,-0.00492
50%,1.228767,0.975652,-0.003622
75%,1.516431,1.091241,-0.002655
max,2.141343,1.46209,0.006198


In [5]:
combined_importances = {m.name: combine_permutation_importances(pi["pemutation_imprtance"], m).describe() for m in RegressionMetricsEnum}
plot_feature_importance_by_feature_plotly(combined_importances,relative=False, caption="Permuation Feature Importances", show_std=True, importance_measure="Permutation Feature Importance")

### Analyze the cross validation

Plots are added in a future PR.

In [6]:
# Performance on test sets
test_cv.get_metrics(combine_folds=True)

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,9.884083,233.73427,0.93558,0.198069,0.967525,0.976663,4.952116e-12


In [7]:
display(test_cv.get_metrics(combine_folds=False))
display(test_cv.get_metrics(combine_folds=False).describe())

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,6.877202,92.259156,0.961175,0.275673,0.98244,0.987879,0.003968
1,10.926883,186.212009,0.937423,0.26103,0.969604,0.963636,0.003968
2,8.687306,130.363378,0.964581,0.136272,0.98828,0.975758,0.003968
3,7.080616,84.001028,0.967287,0.149387,0.983766,0.987879,0.003968
4,15.84841,675.835776,0.883729,0.167982,0.974902,1.0,0.003968


Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,9.884083,233.73427,0.942839,0.198069,0.979799,0.98303,0.003968
std,3.707886,250.416104,0.035107,0.065346,0.007459,0.01382,0.0
min,6.877202,84.001028,0.883729,0.136272,0.969604,0.963636,0.003968
25%,7.080616,92.259156,0.937423,0.149387,0.974902,0.975758,0.003968
50%,8.687306,130.363378,0.961175,0.167982,0.98244,0.987879,0.003968
75%,10.926883,186.212009,0.964581,0.26103,0.983766,0.987879,0.003968
max,15.84841,675.835776,0.967287,0.275673,0.98828,1.0,0.003968
