# Bayesian Optimization based on GP

In [1]:
import numpy as np
import sklearn.gaussian_process as gp
from scipy.stats import norm
from scipy.optimize import minimize
from sklearn.gaussian_process.kernels import RBF, Matern
import math
import time
from copy import deepcopy
from functions import branin, EI, UCB, LP, HLP, compute_L
from multiprocessing import Process, Queue, Array

![Figure 1-1](Capture.png "Figure 1-1")

## Gaussian Process-based Bayesian Optimization, parallel implementation
- HLP and LP penalizers for syncronous and ascyncronous BO
- Locally and globally estimated L-constants

In [4]:
 # same arguments as before, and bounds to limit the optimization as well as n_restarts to allow multiple optimization attempts
def sample_next_parallel(acq_parallel, gaussian_process, X_eval, current_loss, bounds, find_min = True, n_restarts = 10,
                     acq_func = UCB, penalizer = HLP, local_L = True, X_under_eval = None):
    
    X_best = None
    best_acq_val = 999
    n_params = bounds.shape[0]
    
    starting_points = np.random.uniform(bounds[:,0], bounds[:,1], size = (n_restarts, n_params))
    
    for point in starting_points:
        result = minimize(acq_parallel, point.reshape(1, -1), method = 'L-BFGS-B', bounds = bounds, args = 
                          (acq_func, penalizer, gaussian_process, np.copy(X_eval), np.copy(current_loss), 
                        find_min, n_params, bounds, local_L))
        
        if result.fun < best_acq_val:
            X_best = result.x
            best_acq_val = result.fun
            
    return X_best


def acq_parallel(X, acq_func, penalizer, gaussian_process, X_eval, current_loss, find_min, n_params, bounds, local_L):
    
    acq_value = acq_func(X, gaussian_process, current_loss, n_params, find_min)
    penalty = penalizer(X, gaussian_process, X_eval, current_loss, n_params, find_min, bounds, local_L)
    return acq_value * penalty


# n_iters of attempts to optimize the objective, function, within bounds using n_random points to begin, or X_init
# alpha = variance of error term over GP, epsilon - precision tol for float
# three modes - standard (sequential evaluations), syncronous (new batches are evaluated with simultaneous start),
# and asynchronous (new points are evaluated before batch is finished)
def bayesian_optimization(n_iters, function, bounds, acq_func = UCB, penalizer = LP, local_L = True,
                          X_init = None, n_init = 10, gp_params = None, find_min = True, alpha = 1e-5, epsilon = 1e-7):
    
    
    
    X_tested = []
    y_tested = []
    n_params = bounds.shape[0]
    
    if X_init is None:
        X_init = np.random.uniform(bounds[:,0], bounds[:,1], (n_init, bounds.shape[0]))
    
    for X in X_init:
        X_tested.append(X)
        y_tested.append(function(X))
    
    X_np = np.array(X_tested)
    y_np = np.array(y_tested)
    
    # creating the gaussian process
    if gp_params is not None:
        gaussian_process = gp.GaussianProcessRegressor(**args) 
    
    else:
        kernel = Matern()
        gaussian_process = gp.GaussianProcessRegressor(
            kernel = kernel, alpha = alpha, n_restarts_optimizer = 10, normalize_y = True)
    
    # CHANGES START HERE
    # CHANGES START HERE
    # CHANGES START HERE
    
    
    for n in range(n_iters):

        X_np = np.array(X_tested)
        y_np = np.array(y_tested)
        
        gaussian_process.fit(X_tested, y_tested)
        
        #sampling of next point - can be done with random search (not implemented) or optimization of act_func
        X_next = sample_next_parallel(acq_parallel, deepcopy(gaussian_process), X_eval, y_np, bounds, find_min = True, n_restarts = 10,
                     acq_func = acq_func, penalizer = penalizer, local_L = local_L, X_under_eval = None)
        
        while np.any(np.abs(X_next - X_tested) <= epsilon):
            X_next = sample_next_parallel(acq_parallel, gaussian_process, X_eval, y_np, bounds, find_min = True, n_restarts = 10,
                     acq_func = acq_func, penalizer = penalizer, local_L = local_L, X_under_eval = None)
            
            print(X_next, 'has already been sampled.\nIteration', n)
        
        y_next = function(X_next)
        X_tested.append(X_next)
        y_tested.append(y_next)
        
        # CHANGES END HERE
        # CHANGES END HERE
        # CHANGES END HERE

    return X_tested, y_tested

## Attempting to write logic for parallel - still to go

## Testing of implementations so far

In [6]:
function = branin
gp_params = None
X_tested = []
y_tested = []
bounds = np.array([[-5, 10], [0, 15]])
n_params = bounds.shape[0]
n_init = 20
X_init = None
    
if X_init is None:
    X_init = np.random.uniform(bounds[:,0], bounds[:,1], (n_init, bounds.shape[0]))
    
for X in X_init:
    X_tested.append(X)
    y_tested.append(function(X))
    
X_np = np.array(X_tested)
y_np = np.array(y_tested)
    
# creating the gaussian process
if gp_params is not None:
    gaussian_process = gp.GaussianProcessRegressor(**args) 
    
else:
    kernel = Matern()
    gaussian_process = gp.GaussianProcessRegressor(
        kernel = kernel, n_restarts_optimizer = 10, normalize_y = True)
x    
        
gaussian_process.fit(X_tested, y_tested)
        
X_eval = np.array([[2, 2], [6, 6], [-4, 1], [-3, 12]])
sample_next_parallel(acq_parallel, gaussian_process, X_eval, y_np, bounds)

array([3.82635339, 0.54879798])

# Starting the program

In [None]:
X_tested, y_tested = bayesian_optimization(200, branin, bounds = np.array([[-5, 10], [0, 15]]), n_init = 16, acq_func = UCB)

best_value = np.min(y_tested)
best_iter = np.argmin(y_tested)
best_X = X_tested[best_iter]
print(best_X, best_value, best_iter)
