# Model Building with BoFire

This notebooks shows how to setup and analyze models trained with BoFire. It is stil WIP.

## Imports

In [1]:
from bofire.domain.features import (
    ContinuousInput,
    ContinuousOutput,
    InputFeatures,
    OutputFeatures,
)
from bofire.models.torch_models import SingleTaskGPModel
from bofire.utils.enum import RegressionMetricsEnum
from bofire.models.feature_importance import permutation_importance_hook, combine_permutation_importances
from bofire.plot.feature_importance import plot_feature_importance_by_feature_plotly

## Problem Setup

For didactic purposes, we sample data from a Himmelblau benchmark function and use them to train a SingleTaskGP.

In [2]:
# Todo: replace this after JDs PR is ready.
input_features = InputFeatures(
        features=[
            ContinuousInput(key=f"x_{i+1}", lower_bound=-4, upper_bound=4)
            for i in range(3)
        ]
    )
output_features = OutputFeatures(features=[ContinuousOutput(key="y")])
experiments = input_features.sample(n=50)
experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True)
experiments["valid_y"] = 1

## Cross Validation
### Run the cross validation

In [3]:
model = SingleTaskGPModel(
    input_features=input_features,
    output_features=output_features,
)
train_cv, test_cv, pi = model.cross_validate(experiments, folds=5, hooks={"pemutation_imprtance": permutation_importance_hook})

In [4]:
combine_permutation_importances(pi["pemutation_imprtance"], RegressionMetricsEnum.R2).describe()

Unnamed: 0,x_1,x_2,x_3
count,5.0,5.0,5.0
mean,1.354148,1.38408,-2.798344e-07
std,0.673104,0.384766,6.318995e-07
min,0.706702,0.923748,-1.383151e-06
25%,0.938413,1.148148,-1.975908e-07
50%,1.123698,1.345757,-8.879409e-09
75%,1.594509,1.588075,-5.882954e-10
max,2.407416,1.914673,1.910379e-07


In [5]:
combined_importances = {m.name: combine_permutation_importances(pi["pemutation_imprtance"], m).describe() for m in RegressionMetricsEnum}
plot_feature_importance_by_feature_plotly(combined_importances,relative=False, caption="Permuation Feature Importances", show_std=True, importance_measure="Permutation Feature Importance")

### Analyze the cross validation

Plots are added in a future PR.

In [5]:
# Performance on test sets
test_cv.get_metrics(combine_folds=True)

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,8.097711,402.398279,0.896889,0.746935,0.949201,0.953517,4.952116e-12


In [6]:
display(test_cv.get_metrics(combine_folds=False))
display(test_cv.get_metrics(combine_folds=False).describe())

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,1.069787,2.482588,0.999416,0.017568,0.999754,1.0,0.003968
1,19.881369,1489.292683,0.546031,0.463635,0.783047,0.757576,0.103175
2,4.575483,51.177447,0.986966,0.159042,0.993895,0.975758,0.003968
3,5.183901,97.612242,0.975734,0.294463,0.992707,0.975758,0.003968
4,9.778017,371.426436,0.872619,2.799968,0.935338,0.915152,0.103175


Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,8.097711,402.398279,0.876153,0.746935,0.940948,0.924848,0.043651
std,7.279528,624.195855,0.1913,1.159475,0.092066,0.098622,0.054338
min,1.069787,2.482588,0.546031,0.017568,0.783047,0.757576,0.003968
25%,4.575483,51.177447,0.872619,0.159042,0.935338,0.915152,0.003968
50%,5.183901,97.612242,0.975734,0.294463,0.992707,0.975758,0.003968
75%,9.778017,371.426436,0.986966,0.463635,0.993895,0.975758,0.103175
max,19.881369,1489.292683,0.999416,2.799968,0.999754,1.0,0.103175
