# Model Building with BoFire

This notebooks shows how to setup and analyze models trained with BoFire. It is stil WIP.

## Imports

In [2]:
from bofire.data_models.domain.api import Inputs, Outputs
from bofire.data_models.features.api import ContinuousInput, ContinuousOutput
from bofire.data_models.surrogates.api import SingleTaskGPSurrogate
import bofire.surrogates.api as surrogates
from bofire.data_models.enum import RegressionMetricsEnum
from bofire.surrogates.feature_importance import permutation_importance_hook, combine_permutation_importances
from bofire.plot.feature_importance import plot_feature_importance_by_feature_plotly

  from .autonotebook import tqdm as notebook_tqdm


## Problem Setup

For didactic purposes, we sample data from a Himmelblau benchmark function and use them to train a SingleTaskGP.

In [3]:
# Todo: replace this after JDs PR is ready.
input_features = Inputs(
        features=[
            ContinuousInput(key=f"x_{i+1}", bounds = (-4,4))
            for i in range(3)
        ]
    )
output_features = Outputs(features=[ContinuousOutput(key="y")])
experiments = input_features.sample(n=50)
experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True)
experiments["valid_y"] = 1

## Cross Validation
### Run the cross validation

In [6]:
data_model = SingleTaskGPSurrogate(
    inputs=input_features,
    outputs=output_features,
)

model = surrogates.map(data_model=data_model)
train_cv, test_cv, pi = model.cross_validate(experiments, folds=5, hooks={"pemutation_imprtance": permutation_importance_hook})

In [7]:
combine_permutation_importances(pi["pemutation_imprtance"], RegressionMetricsEnum.R2).describe()

Unnamed: 0,x_1,x_2,x_3
count,5.0,5.0,5.0
mean,1.376067,0.959183,-1.6e-05
std,0.293829,0.363178,0.016669
min,1.006052,0.393726,-0.01337
25%,1.169221,0.836989,-0.011787
50%,1.407687,1.036672,-0.010514
75%,1.559241,1.244672,0.0128
max,1.738135,1.283855,0.02279


In [8]:
combined_importances = {m.name: combine_permutation_importances(pi["pemutation_imprtance"], m).describe() for m in RegressionMetricsEnum}
plot_feature_importance_by_feature_plotly(combined_importances,relative=False, caption="Permuation Feature Importances", show_std=True, importance_measure="Permutation Feature Importance")

### Analyze the cross validation

Plots are added in a future PR.

In [9]:
# Performance on test sets
test_cv.get_metrics(combine_folds=True)

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,13.60488,637.579312,0.834199,0.345342,0.91346,0.904442,7.169177e-10


In [10]:
display(test_cv.get_metrics(combine_folds=False))
display(test_cv.get_metrics(combine_folds=False).describe())

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,24.542953,1742.281114,0.370354,1.038079,0.614752,0.6,0.103175
1,10.890329,432.252654,0.915275,0.228691,0.965604,0.963636,0.003968
2,8.987626,276.360567,0.925229,0.207901,0.96405,1.0,0.003968
3,14.604859,398.36157,0.92339,0.142921,0.963131,0.915152,0.003968
4,8.998632,338.640657,0.814963,0.109117,0.911123,0.915152,0.003968


Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,13.60488,637.579312,0.789842,0.345342,0.883732,0.878788,0.02381
std,6.529677,620.401793,0.239008,0.390243,0.152117,0.15989,0.044366
min,8.987626,276.360567,0.370354,0.109117,0.614752,0.6,0.003968
25%,8.998632,338.640657,0.814963,0.142921,0.911123,0.915152,0.003968
50%,10.890329,398.36157,0.915275,0.207901,0.963131,0.915152,0.003968
75%,14.604859,432.252654,0.92339,0.228691,0.96405,0.963636,0.003968
max,24.542953,1742.281114,0.925229,1.038079,0.965604,1.0,0.103175
