In [1]:
from bayes_opt import BayesianOptimization
from bayes_opt import UtilityFunction
import numpy as np
from scipy.stats import norm

import matplotlib.pyplot as plt
from matplotlib import gridspec
%matplotlib inline

# Target Function

Lets create a target 1-D function with multiple local maxima to test and visualize how the [BayesianOptimization](https://github.com/fmfn/BayesianOptimization) package works. The target function we will try to maximize is the following:

$$f(x) = e^{-(x - 2)^2} + e^{-\frac{(x - 6)^2}{10}} + \frac{1}{x^2 + 1}, $$ its maximum is at $x = 2$ and we will restrict the interval of interest to $x \in (-2, 10)$.

Notice that, in practice, this function is unknown, the only information we have is obtained by sequentialy probing it at different points. Bayesian Optimization works by contructing a posterior distribution of functions that best fit the data observed and chosing the next probing point by balancing exploration and exploitation.

In [2]:
RHO_DEFAULT = 0.01
M_DEFAULT = 1

def target(x, y):
    xs = np.array([x,y])
    return np.array([np.sin(xs[0]) + xs[1]])

def constraint(x, y):
    z = np.array([x,y])
    return np.sin(z[0])*np.sin(z[1]) + 0.95


def u(x: np.array, z: np.array, y:np.array, rho = RHO_DEFAULT):
    """
    params
    z: np.array for constraint
    x: np.array for value of x from k+1 iteration
    y: np.array for lambda of lagrangian for kth iteration
    rho: convergence parameter rho
    M : some large number (hyperparameter) 
    """
    return target(x[0], x[1]) + q_i(z, x, y, rho, M = 1)
    
    

def h_i(x: np.array , z: np.array,  y:np.array, rho = RHO_DEFAULT, M = M_DEFAULT):
    """
    params
    z: np.array for constraint
    x: np.array for value of x from k+1 iteration
    y: np.array for lambda of lagrangian for kth iteration
    rho: convergence parameter rho
    M : some large number (hyperparameter) 
    """
    return np.int64(constraint(z[0], z[1]) > 0) + q_i(z, x, y, rho, M)

def q_i(x: np.array, z: np.array, y : np.float64, rho = 0.01, M = 1):
    return rho / (2*M) * (np.linalg.norm(x - z + y/rho) ** 2)

# def ei_constraint(z: np.array, x: np.array , theta: np.float64, rho = 0.01, M = 1):
#     theta = 

In [3]:
pbounds = {'x': (0, 6), 'y': (0, 6)}

# Create a BayesianOptimization Object

Enter the target function to be maximized, its variable(s) and their corresponding ranges. A minimum number of 2 initial guesses is necessary to kick start the algorithms, these can either be random or user defined.

In [4]:
t_optim = BayesianOptimization(
    f=target,
    pbounds=pbounds,
    verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
    random_state=1,
)
c_optim = BayesianOptimization(
    f=constraint,
    pbounds=pbounds,
    verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
    random_state=1,
)



#xi here is slack of utility function
utility = UtilityFunction(kind="ei", kappa=2.5, xi=1e-6)
# c_utility = UtilityFunction(kind="constraint", kappa=2.5, xi=1e-6)

In [5]:
next_point_to_probe = t_optim.suggest(utility)
print("Next point to probe is:", next_point_to_probe)

t_val = target(**next_point_to_probe)
print("Found the target value to be:", t_val)

Next point to probe is: {'x': 2.502132028215444, 'y': 4.321946960652949}
Found the target value to be: [4.91870969]


In [6]:
from scipy.optimize import minimize

def ADMMBO(pbounds : dict, target : callable, constraints : np.array, regulariser : callable, 
           num_constraints : int, num_inits_f : int, num_inits_constraint : np.array,
           init_ys : np.array = None, init_zs : np.array = None, init_xs : np.array = None,
           rho : float = 1e-2, M : int = 1, epsilon : np.float64 = 1e-2, max_iter = 3):
    
    #Renaming a bit
    n = num_inits_f
    m = num_inits_constraint
    
    assert num_constraints == num_inits_constraint.shape[0]
    
    dim_space = len(pbounds)
    
    utility = UtilityFunction(kind="ei", kappa=2.5, xi=1e-6)
    
    c_optims = []
    for i in range(num_constraints):
        c_optim = BayesianOptimization(
                #FEAS here is without the regularizer
                    f=constraints[i],
                    pbounds=pbounds,
                    verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
                    random_state=1,
                )
        c_optims.append(c_optim)
    
    
    #Get initial points and evaluate
    
    #Initialise evaluation arrays
    F = np.zeros(n)
    Cs = np.zeros((num_constraints, np.max(m)))
    if not init_xs:
        init_xs = np.zeros((n, dim_space))
        #Generate the initial x points and evaluate all these points
        for i in range(n):
            #The problem is this outputs a dictionary ...
            x_i = t_optim.suggest(utility)
            init_xs[i] = list(x_i.values())
            F[i] = target(**x_i)
    else:
        assert init_xs.shape[0] == num_inits_f
        assert init_xs.shape[1] == dim_space
        
        for i in range(n):
            F[i] = target(init_xs[i])
    
    if not init_zs:
        init_zs = np.zeros((num_constraints, np.max(m), dim_space))
        # Generate but do not evaluate all the constraint points zs (as these are evaluated later)
        for j in range(num_constraints):
            for k in range(m[j]):
                c_optim = c_optims[j]
                z_i = c_optim.suggest(utility)
                init_zs[j,k] = list(z_i.values())
            
            #Now register the points (this is done behind the scenes with probe. 
            #Lazy = True just saves some computation by only calculating inverses when necessary)
            for k in range(m[j]):
                point = init_zs[j,k]
                Cs[j,k] = constraints[j](*point)
                c_optim.probe(
                        params = point,
                        lazy = True
                        )
    else:
        assert init_zs.shape[0] == num_constraints
        assert init_zs.shape[1] == np.max(m)
        assert init_zs.shape[2] == dim_space
        assert False, "not implemented yet"
    if not init_ys:
        init_ys = np.random.uniform(0, 1, size = num_constraints)
    else:
        assert init_ys.shape[0] == num_constraints, "number of initialised lambda values must be equal to number of constraints"
    
    ##MAIN LOOP
    
    solved = False
    k = 0
    xs = init_xs
    zs = init_zs
    ys = init_ys
    
    #Use first slice of zs as current best value of zs
    best_z = zs[:, 0, :]
    
    while k < max_iter and not solved:
        best_x, xs, F = run_opt(target, regulariser, xs, F, pbounds, best_z, ys)
        
        best_z, zs, Cs, ys, solved = run_feas(
            constraints=constraints, regulariser=regulariser, 
            z_mins_prev=best_z, m=m, x=best_x, 
            Cs=Cs, c_optims=c_optims, zs=zs, ys=ys, 
            pbounds=pbounds, rho=rho, M=M, epsilon=epsilon,
            max_iter=1)
        k+=1
    return best_x, best_z

#All good, does as expected
def run_opt(target: callable, regulariser : callable, 
            xs : np.array, F : np.array, pbounds : dict, zs : np.array, ys : np.array, max_iter = 1):

    assert len(ys) == len(zs)
    
    #We need a new t_optim function for each run because the posterior will be different based on each new z
    t_optim = BayesianOptimization(
            f=None,
            pbounds=pbounds,
            verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
            random_state=1,
        )

    U = np.zeros(len(xs))

    #This updates the GP posterior
    for i in range(xs.shape[0]):
        q_sum = np.sum(np.array([regulariser(xs[i], zs[j], ys[j]) for j in range(zs.shape[0])]))
        U[i] = F[i] + q_sum

        #This here registers and runs the optimisation step, 
        #would be better if I could make this lazy but requires editing underlying code
        
        try:
            t_optim.register(
                params = xs[i],
                target = U[i]
            )
        except:
            print(f"TARGET: we already have {xs[i]}, but it tried adding")
    
    utility = UtilityFunction(kind="ei", kappa=2.5, xi=1e-6) 
    for _ in range(max_iter):
        next_point = t_optim.suggest(utility)

        xs = np.concatenate((xs, np.array(list(next_point.values())).reshape(1, xs.shape[1])), axis = 0)
        
        F = np.concatenate((F, target(**next_point)))
        
        q_sum = np.sum(np.array([regulariser(xs[-1], zs[j], ys[j]) for j in range(zs.shape[0])]))
        U[i] = F[-1] + q_sum
        try:
            t_optim.register(
                params = xs[-1],
                target = U[-1]
            )
        except:
            print(f"TARGET: we already have {xs[i]}, but it tried adding")

    argmin = np.argmin(U)
    xmin = xs[argmin]

    del t_optim

    return (xmin, xs, F)
    
def run_feas(constraints : np.array, regulariser : callable, z_mins_prev : np.array, m : np.array,
            x : np.array, Cs : np.array, c_optims : np.array, zs : np.array, ys : np.array, 
            pbounds : dict, rho : float, M : int, epsilon : float, max_iter = 1):


    #Very unintuitive naming for rs and ss but it basically means the "r's" and "s's" 
    # used in the paper for later determining if we're done or not
    z_mins = np.zeros((zs.shape[0], zs.shape[2])); rs = np.zeros((zs.shape[0], zs.shape[2])); 
    ss = np.zeros((zs.shape[0], zs.shape[2]));

    #For each constraint
    for j in range(zs.shape[0]):

        c_optim = c_optims[j]

        #TODO: Could probably be done more efficiently
        #Evaluates H over all known points
        H = (np.int64(Cs[j] > 0)
                + np.array([regulariser(x, zs[j, a ,:], ys, rho, M) for a in range(zs.shape[1])]))
        h_plus = np.max(H)
        

        #Might be able to do this more efficiently because its just minimising 
        # some norm so set z = x + y and truncate when out of bounds
        utility = UtilityFunction(kind="constraint", kappa=2.5, xi=1e-6, 
                                  x_constraint = x, y_constraint = ys[j], rho = rho, M = M, h_plus = h_plus)

        for iteration in range(max_iter):
            #Add something to h
            #Could also use point = next_point
            point = zs[j, -1, :]
            h_eval = Cs[j, -1] - regulariser(x, point, ys[j], rho, M)
            try:
                c_optim.register(
                    params = point,
                    target = Cs[j, -1]
                )
            except:
                print(f"CONSTRAINT: tried adding {point} but looks like we already have that point")

            next_point = c_optim.suggest(utility)

            #If we're at the max number of constraint m, we need to concat, otherwise just assign
            if m[j] == np.max(m):
                zs = np.concatenate((zs, np.zeros((zs.shape[0], 1, zs.shape[2]))), axis = 1)
                Cs = np.concatenate((Cs, np.zeros((Cs.shape[0], 1))), axis = 1)
  
            zs[j, m[j]] = np.array(list(next_point.values()))
            Cs[j, m[j]] = constraints[j](*list(next_point.values()))
            
           
            H_new = int(Cs[j, -1] > 0) + regulariser(x, zs[j,-1,:], ys[j], rho, M)
            H = np.concatenate((H, np.array([H_new])))
        
        z_mins[j, :] = zs[j, np.argmin(H), :]
        rs[j, :] = x - z_mins[j, :] 
        ss[j, :] = -rho * (z_mins[j, :] - z_mins_prev[j, :])

    is_solved = (np.linalg.norm(rs) < epsilon) & (np.linalg.norm(ss) < epsilon)
    return z_mins, zs, Cs, ys, is_solved

        

In [8]:
constraints = np.array([constraint])
num_constraints = 1
num_inits_f = 3
num_inits_constraint = np.array([3])
regulariser = q_i

#Doesn't fkn converge ripppppp, they seem to diverge instead
print(ADMMBO(pbounds, target, constraints, regulariser, num_constraints, num_inits_f, num_inits_constraint,
           max_iter = 20))

CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding [0. 0.] but looks like we already have that point
CONSTRAINT: tried adding 