# Model Building with BoFire

This notebooks shows how to setup and analyze models trained with BoFire. It is stil WIP.

## Imports

In [1]:
from bofire.domain.features import (
    ContinuousInput,
    ContinuousOutput,
    InputFeatures,
    OutputFeatures,
)
from bofire.models.gps import SingleTaskGPModel
from bofire.utils.enum import RegressionMetricsEnum
from bofire.models.feature_importance import permutation_importance_hook, combine_permutation_importances
from bofire.plot.feature_importance import plot_feature_importance_by_feature_plotly

## Problem Setup

For didactic purposes, we sample data from a Himmelblau benchmark function and use them to train a SingleTaskGP.

In [2]:
# Todo: replace this after JDs PR is ready.
input_features = InputFeatures(
        features=[
            ContinuousInput(key=f"x_{i+1}", lower_bound=-4, upper_bound=4)
            for i in range(3)
        ]
    )
output_features = OutputFeatures(features=[ContinuousOutput(key="y")])
experiments = input_features.sample(n=50)
experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True)
experiments["valid_y"] = 1

## Cross Validation
### Run the cross validation

In [3]:
model = SingleTaskGPModel(
    input_features=input_features,
    output_features=output_features,
)
train_cv, test_cv, pi = model.cross_validate(experiments, folds=5, hooks={"pemutation_imprtance": permutation_importance_hook})

In [5]:
combine_permutation_importances(pi["pemutation_imprtance"], RegressionMetricsEnum.R2).describe()

Unnamed: 0,x_1,x_2,x_3
count,5.0,5.0,5.0
mean,1.270835,0.774548,0.013411
std,0.367562,0.224985,0.023255
min,0.664452,0.380786,-0.014057
25%,1.224416,0.836436,0.003742
50%,1.390944,0.850764,0.005209
75%,1.452784,0.851853,0.025589
max,1.621579,0.952898,0.046572


In [6]:
combined_importances = {m.name: combine_permutation_importances(pi["pemutation_imprtance"], m).describe() for m in RegressionMetricsEnum}
plot_feature_importance_by_feature_plotly(combined_importances,relative=False, caption="Permuation Feature Importances", show_std=True, importance_measure="Permutation Feature Importance")

### Analyze the cross validation

Plots are added in a future PR.

In [7]:
# Performance on test sets
test_cv.get_metrics(combine_folds=True)

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,12.20462,346.819318,0.933413,0.303224,0.96649,0.941897,7.169177e-10


In [8]:
display(test_cv.get_metrics(combine_folds=False))
display(test_cv.get_metrics(combine_folds=False).describe())

Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
0,5.060924,68.171436,0.9766,0.083701,0.989339,0.975758,0.003968
1,19.146679,587.6204,0.89436,0.904898,0.952141,0.430303,0.5
2,17.21557,640.296279,0.886919,0.125528,0.970113,0.987879,0.003968
3,5.786071,61.09102,0.980054,0.074658,0.992872,0.975758,0.003968
4,13.813856,376.917453,0.923035,0.327338,0.965278,0.878788,0.003968


Unnamed: 0,MAE,MSD,R2,MAPE,PEARSON,SPEARMAN,FISHER
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,12.20462,346.819318,0.932194,0.303224,0.973949,0.849697,0.103175
std,6.483086,275.819274,0.044237,0.35164,0.017033,0.238545,0.221832
min,5.060924,61.09102,0.886919,0.074658,0.952141,0.430303,0.003968
25%,5.786071,68.171436,0.89436,0.083701,0.965278,0.878788,0.003968
50%,13.813856,376.917453,0.923035,0.125528,0.970113,0.975758,0.003968
75%,17.21557,587.6204,0.9766,0.327338,0.989339,0.975758,0.003968
max,19.146679,640.296279,0.980054,0.904898,0.992872,0.987879,0.5
