In [None]:
%matplotlib inline

# Expected improvement based on cross-validation.


In [None]:
from __future__ import annotations

import matplotlib.pyplot as plt
from gemseo.algos.design_space import DesignSpace
from gemseo.datasets.io_dataset import IODataset
from gemseo.mlearning.regression.rbf import RBFRegressor
from numpy import array
from numpy import cos
from numpy import linspace

from gemseo_mlearning.active_learning.acquisition_criteria.expected_improvement import (
    ExpectedImprovement,
)
from gemseo_mlearning.active_learning.acquisition_criteria.mean_sigma import MeanSigma
from gemseo_mlearning.active_learning.active_learning_algo import ActiveLearningAlgo
from gemseo_mlearning.active_learning.distributions.regressor_distribution import (
    RegressorDistribution,
)

n_test = 200
x_l = -3.0
x_u = 3.0

Initial learning dataset
------------------------



In [None]:
def f(x):
    return (10 * cos(2 * x) + 15 - 5 * x + x**2) / 50


x_train = array([-2.4, -1.2, 0.0, 1.2, 2.4])
y_train = f(x_train)

dataset = IODataset()
dataset.add_input_variable("x", x_train)
dataset.add_output_variable("y", y_train)

Initial surrogate model
-----------------------



In [None]:
algo = RBFRegressor(dataset)
algo.learn()

Create MLAlgoSampler
--------------------



In [None]:
distribution = RegressorDistribution(algo, bootstrap=False, loo=True)
distribution.learn()

Filling objectives
------------------



In [None]:
ego = ExpectedImprovement(distribution)
lower = MeanSigma(distribution, -2.0)
upper = MeanSigma(distribution, 2.0)

Find next training point
------------------------



In [None]:
space = DesignSpace()
space.add_variable("x", l_b=x_l, u_b=x_u, value=1.5)

acquisition = ActiveLearningAlgo("ExpectedImprovement", space, distribution)
acquisition.set_acquisition_algorithm("fullfact")
opt = acquisition.compute_next_input_data()

Evaluation of discipline, surrogate model and expected improvement
------------------------------------------------------------------



In [None]:
x_test = linspace(x_l, x_u, n_test)
ego_data = []
surr_data = []
lower_data = []
upper_data = []
y_test = f(x_test)
for x_i in x_test:
    surr_data.append(algo.predict(array([x_i]))[0])
    ego_data.append(ego(array([x_i]))[0])
    lower_data.append(lower(array([x_i]))[0] * lower.output_range)
    upper_data.append(upper(array([x_i]))[0] * upper.output_range)
ego_data = array(ego_data)
lower_data = array(lower_data)
upper_data = array(upper_data)

disc_data = IODataset()
disc_data.add_input_variable("x", x_test)
disc_data.add_output_variable("y", y_test)

Plotting
--------



In [None]:
fig, ax = plt.subplots(2, 1)
for algo_b in distribution.algos:
    algo_data = [algo_b.predict(array([x_i])) for x_i in x_test]
    ax[0].plot(x_test, algo_data, "gray", alpha=0.2)
ax[0].plot(
    x_train, dataset.get_view(variable_names="y").to_numpy(), "ro", label="training"
)
ax[0].plot(
    x_test, disc_data.get_view(variable_names="y").to_numpy(), "r", label="original"
)
ax[0].plot(x_test, surr_data, "b", label="surrogate")
ax[0].fill_between(
    x_test, lower_data, upper_data, color="b", alpha=0.1, label="CI(95%)"
)
ax[0].legend(loc="upper right")
ax[0].axvline(x=opt[0])
ax[1].plot(x_test, ego_data, "r", label="EGO")
ax[1].axvline(x=opt[0])
ax[1].legend()
plt.show()