# Bayesian Optimization based on GP

In [2]:
import numpy as np
import sklearn.gaussian_process as gp
from scipy.stats import norm
from scipy.optimize import minimize
from sklearn.gaussian_process.kernels import RBF

## Gaussian Process-based Bayesian Optimization implemented
- Starting with expected improvement acq. function
- Going to add UCB

In [1]:
# EI takes the measured x-values and the gaussian process object as well as the current evaluated loss
# Boolean for maximizatioN/minimization
def EI(X, gp, current_loss, n_params, find_min = True):
    
    X_pred = X.reshape(-1, n_params)
    mu, std = gp.predict(X_pred, return_cov = True)
    
    if find_min:
        best_loss = np.min(current_loss)
    else:
        best_loss = np.max(current_loss)
    
    # Normalize based on the GP posterior and account for max/min condition
    sign_X = (-1) ** find_min
    with np.errstate(divide = 'ignore'):
        norm_X = sign_X * (mu - best_loss)/std
        ei = mu * sign_X * (mu - best_loss) * norm.cdf(norm_X) + std * norm.pdf(norm_X)  
        
        # to exclude points with no standard deviation (likely alredy been tested)
        ei[std == 0] = 0
        return (-1) * ei


# same arguments as before, and bounds to limit the optimization as well as n_restarts to allow multiple optimization attempts
def sample_next_point(acq_func, gp, current_loss, find_min = True, bounds = (0, 1), n_restarts = 10):
    
    X_best = None
    best_acq_val = 999
    n_params = bounds.shape[0]
    
    starting_points = np.random.uniform(bounds[:,0], bounds[:,1], size = (n_params, n_restarts))
    for point in starting_points:
        result = minimize(acq_func, point.reshape[1, -1], method = 'L-BFGS-B', bounds = bounds, args = (gp, current_loss, n_params, find_min))
        
        if result.fun < best_acq_val:
            X_best = result.x
            best_acq_val = result.fun
            
    return X_best


# n_iters of attempts to optimize the objective, function, within bounds using n_random points to begin, or X_init
# alpha = variance of error term over GP, epsilon - precision tol for float
def bayesian_optimization(n_iters, function, bounds, X_init = None, n_init = 10, gp_params = None, alpha = 1e-5, epsilon = 1e-7):
    
    X_tested = []
    y_tested = []
    n.params = bounds.shape[0]
    
    if X_init is None:
        X_init = np.random.uniform(bounds[:,0], bounds[:,1], (n_init, bounds.shape[0]))
    
    for X in X_init:
        X_tested.append(X)
        y_tested.append(function(X))
    
    X_tested = np.array(X_tested)
    y_tested = np.array(y_tested)
    
    # creating the gaussian process
    gp = 
    