# Model Building with BoFire

This notebooks shows how to setup and analyze models trained with BoFire. It is stil WIP.

## Imports

In [1]:
from bofire.data_models.domain.api import Inputs, Outputs
from bofire.data_models.features.api import ContinuousInput, ContinuousOutput
from bofire.data_models.surrogates.api import SingleTaskGPSurrogate
import bofire.surrogates.api as surrogates
from bofire.data_models.enum import RegressionMetricsEnum
from bofire.surrogates.feature_importance import permutation_importance_hook, combine_permutation_importances
from bofire.plot.feature_importance import plot_feature_importance_by_feature_plotly

  from .autonotebook import tqdm as notebook_tqdm


## Problem Setup

For didactic purposes, we sample data from a Himmelblau benchmark function and use them to train a SingleTaskGP.

In [2]:
# Todo: replace this after JDs PR is ready.
input_features = Inputs(
        features=[
            ContinuousInput(key=f"x_{i+1}", bounds = (-4,4))
            for i in range(3)
        ]
    )
output_features = Outputs(features=[ContinuousOutput(key="y")])
experiments = input_features.sample(n=50)
experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True)
experiments["valid_y"] = 1

## Cross Validation
### Run the cross validation

In [3]:
data_model = SingleTaskGPSurrogate(
    inputs=input_features,
    outputs=output_features,
)

model = surrogates.map(data_model=data_model)
train_cv, test_cv, pi = model.cross_validate(experiments, folds=5, hooks={"pemutation_imprtance": permutation_importance_hook})

In [4]:
combine_permutation_importances(pi["pemutation_imprtance"], RegressionMetricsEnum.R2).describe()

Unnamed: 0,x_1,x_2,x_3
count,5.0,5.0,5.0
mean,1.195658,1.005208,0.003171
std,0.278169,0.432378,0.014748
min,0.74776,0.562584,-0.02255
25%,1.168564,0.696822,0.006167
50%,1.2754,0.843397,0.007754
75%,1.284434,1.378019,0.009598
max,1.502131,1.545219,0.014883


In [5]:
combined_importances = {m.name: combine_permutation_importances(pi["pemutation_imprtance"], m).describe() for m in RegressionMetricsEnum}
plot_feature_importance_by_feature_plotly(combined_importances,relative=False, caption="Permuation Feature Importances", show_std=True, importance_measure="Permutation Feature Importance")

### Analyze the cross validation

Plots are added in a future PR.

In [6]:
# Performance on test sets
test_cv.get_metrics(combine_folds=True)

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,10.360605,199.467519,0.952375,0.283386,0.97644,0.97503,7.169177e-10


In [7]:
display(test_cv.get_metrics(combine_folds=False))
display(test_cv.get_metrics(combine_folds=False).describe())

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,11.574244,163.418726,0.875903,0.13997,0.962757,0.939394,0.003968
1,11.350321,401.145887,0.925813,0.129175,0.965243,0.951515,0.003968
2,10.477276,148.008528,0.955473,0.319148,0.977972,0.939394,0.103175
3,8.435506,139.297341,0.956663,0.152325,0.980538,0.975758,0.103175
4,9.96568,145.467113,0.972863,0.676311,0.988691,0.987879,0.003968


Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,10.360605,199.467519,0.937343,0.283386,0.97504,0.958788,0.043651
std,1.25778,113.090895,0.038313,0.233021,0.010863,0.022019,0.054338
min,8.435506,139.297341,0.875903,0.129175,0.962757,0.939394,0.003968
25%,9.96568,145.467113,0.925813,0.13997,0.965243,0.939394,0.003968
50%,10.477276,148.008528,0.955473,0.152325,0.977972,0.951515,0.003968
75%,11.350321,163.418726,0.956663,0.319148,0.980538,0.975758,0.103175
max,11.574244,401.145887,0.972863,0.676311,0.988691,0.987879,0.103175
