# Model Building with BoFire

This notebooks shows how to setup and analyze models trained with BoFire. It is stil WIP.

## Imports

In [9]:
from bofire.data_models.domain.api import Inputs, Outputs
from bofire.data_models.features.api import ContinuousInput, ContinuousOutput
from bofire.data_models.surrogates.api import SingleTaskGPSurrogate
import bofire.surrogates.api as surrogates
from bofire.data_models.enum import RegressionMetricsEnum
from bofire.surrogates.feature_importance import permutation_importance_hook, combine_permutation_importances, lengthscale_importance_hook, combine_lengthscale_importances
from bofire.plot.feature_importance import plot_feature_importance_by_feature_plotly

## Problem Setup

For didactic purposes, we sample data from a Himmelblau benchmark function and use them to train a SingleTaskGP.

In [10]:
# Todo: replace this after JDs PR is ready.
input_features = Inputs(
        features=[
            ContinuousInput(key=f"x_{i+1}", bounds = (-4,4))
            for i in range(3)
        ]
    )
output_features = Outputs(features=[ContinuousOutput(key="y")])
experiments = input_features.sample(n=50)
experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True)
experiments["valid_y"] = 1

## Cross Validation
### Run the cross validation

In [11]:
data_model = SingleTaskGPSurrogate(
    inputs=input_features,
    outputs=output_features,
)

model = surrogates.map(data_model=data_model)
train_cv, test_cv, pi = model.cross_validate(
    experiments, 
    folds=5, 
    hooks={"permutation_importance": permutation_importance_hook, "lengthscale_importance": lengthscale_importance_hook}
)


In [12]:
combined_importances = {m.name: combine_permutation_importances(pi["permutation_importance"], m).describe() for m in RegressionMetricsEnum}
combined_importances["lengthscale"] = combine_lengthscale_importances(pi["lengthscale_importance"]).describe()
plot_feature_importance_by_feature_plotly(combined_importances,relative=False, caption="Permuation Feature Importances", show_std=True, importance_measure="Permutation Feature Importance")

### Analyze the cross validation

Plots are added in a future PR.

In [13]:
# Performance on test sets
test_cv.get_metrics(combine_folds=True)

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,10.217482,234.434666,0.926578,0.24492,0.964394,0.94449,7.169177e-10


In [14]:
display(test_cv.get_metrics(combine_folds=False))
display(test_cv.get_metrics(combine_folds=False).describe())

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,9.632927,213.727514,0.825231,0.145011,0.949743,0.915152,0.003968
1,6.196258,72.999206,0.973324,0.168016,0.989875,0.963636,0.003968
2,10.828154,385.886677,0.812619,0.232417,0.920254,0.648485,0.103175
3,10.303255,197.282961,0.948934,0.284687,0.977865,0.939394,0.103175
4,14.126814,302.276973,0.934221,0.394467,0.989772,0.987879,0.003968


Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,10.217482,234.434666,0.898866,0.24492,0.965502,0.890909,0.043651
std,2.836155,117.697423,0.074434,0.099997,0.030132,0.138203,0.054338
min,6.196258,72.999206,0.812619,0.145011,0.920254,0.648485,0.003968
25%,9.632927,197.282961,0.825231,0.168016,0.949743,0.915152,0.003968
50%,10.303255,213.727514,0.934221,0.232417,0.977865,0.939394,0.003968
75%,10.828154,302.276973,0.948934,0.284687,0.989772,0.963636,0.103175
max,14.126814,385.886677,0.973324,0.394467,0.989875,0.987879,0.103175
