In [10]:
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
import plotly.graph_objects as go
np.random.seed(42)


def Branin_function(x1,x2):
    a = 1.0 
    b = 5.1/(4.0*(np.pi**2))
    c = 5.0/np.pi
    r = 6.0
    s = 10.0 
    t = 1/(8.0*np.pi)
    return a*(x2-b*(x1**2)+c*x1-r)**2+s*(1-t)*np.cos(x1)+s

In [11]:
n = 50

In [12]:
x1,x2 = np.meshgrid(np.linspace(-5, 10, n), np.linspace(0, 15, n)) # grid of point
y = Branin_function(x1, x2)

In [13]:
y.shape

(50, 50)

In [14]:
# Create a plotly interactive graph
fig = go.Figure(data=[go.Surface(x=x1, y=x2, z=y, colorscale='viridis')])

fig.update_layout(scene=dict(
                    xaxis=dict(title='X Axis'),
                    yaxis=dict(title='Y Axis'),
                    zaxis=dict(title='Z Axis')),
                    title='Branin function 3D Plot',
                    autosize=False,
                    width=800,
                    height=600,
                    margin=dict(l=65, r=50, b=65, t=90))

# Display the plot
fig.show()

## Sample initialization


In [15]:
n_sample = 10
sample_x = np.zeros((10,2))
sample_x[:,0] = np.random.choice(x1[0,:],n_sample)
sample_x[:,1] = np.random.choice(x2[:,0], n_sample)

In [16]:
sample_x

array([[ 6.63265306,  3.06122449],
       [ 3.57142857,  7.04081633],
       [-0.71428571, 10.71428571],
       [ 7.85714286, 11.93877551],
       [-2.85714286,  7.04081633],
       [ 1.12244898,  0.6122449 ],
       [ 6.63265306,  6.42857143],
       [ 0.51020408,  0.30612245],
       [ 1.73469388,  7.04081633],
       [-1.93877551, 13.16326531]])

## Gaussian process

In [8]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern

# Gaussian process regressor with a Matern kernel
kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)
gp_model = GaussianProcessRegressor(kernel=kernel)

2

In [None]:
# Fit the Gaussian process model to the sampled points
gp_model.fit(sample_x.reshape(-1, 1), sample_y)

# Generate predictions using the Gaussian process model
y_pred, y_std = gp_model.predict(x_range.reshape(-1, 1), return_std=True)

# Plot 
plt.figure(figsize=(10, 6))
plt.plot(x_range, black_box_function(x_range), label='Black Box Function')
plt.scatter(sample_x, sample_y, color='red', label='Samples')
plt.plot(x_range, y_pred, color='blue', label='Gaussian Process')
plt.fill_between(x_range, y_pred - 2*y_std, y_pred + 2*y_std, color='blue', alpha=0.2)
plt.xlabel('x')
plt.ylabel('Black Box Output')
plt.title('Black Box Function with Gaussian Process Surrogate Model')
plt.legend()
plt.show()

# Acquisition function

In this section we explain the precise process or strategy for choosing the order of query points $x_{1:n}$. While random selection is an option, it's inefficient. Instead, various strategies in literature use the posterior model to guide the sequential search, determining the next query point $x_{n+1}$ based on $\mathcal{D}_n=\{(x_1,y_1),...,(x_n,y_n)\}$.

Acquisition functions are designed to balance between exploring new areas of the search space and exploiting the current promising areas.

Once an acquisition function is employed to identify the next hyperparameter, the actual score of this new hyperparameter based on the true objective function is acquired. Because the surrogate model has been trained on pairs of (hyperparameter, true objective function score), incorporating a new data point updates the surrogate model.

In the following we will consider x, the query point, its corresponding function value $\nu = f(x)$ and the model hyperparameters $\theta$. UTILITY FUNCTION?. We can marginalize the unseen outcome y and the unknown model hyperparameters $\theta$ ...  SPIEGARE?

#### PI acquisition function
Probability of improvement acquisition function is an improvement-based acquisition function, meaning that it favors points that are likely to improve upon a certain time.
This function measures the probability that a point x leads to an improvement upon $\tau$. We can analytically compute this probability as follows:
$$
\alpha_{PI}(x;\mathcal{D}_n) := \mathcal{P}[\nu > \tau] = \Phi\left(\frac{\mu_n(x) - \tau}{\sigma_n(x)}\right)
$$

where $\Phi$ is the standard normal cumulative distribution function. As we can see from the formula PI considers the difference between the mean prediction and the current best value, taking into account the uncertainty in the surrogate model. $\alpha_{PI}(x;\mathcal{D}_n)$ is then maximized to select the next query point.

#### EI acquisition function
PI is often a good solution for the choice of the acquisition function when the target is known; in general, however the heuristic used for an unknown target causes PI to exploit quite aggressively.    
We can consider a differen acquisition function (EI).    
This new criterion corresponds to a different utility function,the improvement function, denoted by $I(x)$, which is defined as follows: 
$$I(\text{x}, \nu,\theta)=(\nu-\tau)\mathbb{I}(\nu>\tau)$$
and it has positive value only if there is an improvement. Since $\nu$ is normally distributed, the *expectation of improvement* can be computed as:
$$\alpha_{EI}(\text{x},\mathcal{D}_n)=\mathbb{E}[I(\text{x}, \nu,\theta)]=(\mu_n(\text{x}-\tau))\Phi \left(\dfrac{\mu_n(\text{x})-\tau}{\sigma_n(\text{x})}\right)  + \sigma_n(\text{x})\phi\left(\dfrac{\mu_n(\text{x})-\tau}{\sigma_n(\text{x})}\right) $$
when $\sigma_n>0$ (it vanishes otherwise); in this expression:
- $\Phi$ is the standard normal cumulative distribution function
- $\phi$ is the standard normal probability distribution function.    
    
    

>*Note on the parameter $\tau$*:
>although the target value (the best reachable objective value) is often unknown, in practice $\tau$ is adaptively set to be the best observed value ($\tau=y^+=max_{i=1:n}y_i$)


***This part below will need to be fixed dimensionally after we finish the implementation of the Gaussian Process model***

In [None]:
# Probability of improvement
def poi(x, gp_model, best_y):
    mean, std = gp_model.predict(x, return_std=True)
    z = (mean - y_max)/std
    return norm.cdf(z)

In [None]:
# Expected improvement
def ei(x, gp_model, best_y):
    """Parameters:
        x:  
        gp_model: gaussian process model
        best_y: max value of y (tau)
    Output: expected improvement"""
    
    #Compute mean and std of y
    y_pred, y_std = gp_model.predict(x, return_std=True)
    #Compute ei
    z = (y_pred - best_y) / y_std
    return (y_pred - best_y) * norm.cdf(z) + y_std * norm.pdf(z)

In [None]:
# Determine the point with the highest observed function value
# This works dimensionally with sample_y = Branin_function(sample_x[:,0],sample_x[:,1])
best_idx = np.argmax(sample_y)
best_x = sample_x[best_idx]
best_y = sample_y[best_idx]

In [None]:
ei = ei(x_range, gp_model, best_y)