In [1]:
from bayes_opt import BayesianOptimization
from bayes_opt import UtilityFunction
import numpy as np
from scipy.stats import norm

import matplotlib.pyplot as plt
from matplotlib import gridspec
%matplotlib inline

# Target Function

Lets create a target 1-D function with multiple local maxima to test and visualize how the [BayesianOptimization](https://github.com/fmfn/BayesianOptimization) package works. The target function we will try to maximize is the following:

$$f(x) = e^{-(x - 2)^2} + e^{-\frac{(x - 6)^2}{10}} + \frac{1}{x^2 + 1}, $$ its maximum is at $x = 2$ and we will restrict the interval of interest to $x \in (-2, 10)$.

Notice that, in practice, this function is unknown, the only information we have is obtained by sequentialy probing it at different points. Bayesian Optimization works by contructing a posterior distribution of functions that best fit the data observed and chosing the next probing point by balancing exploration and exploitation.

In [2]:
RHO_DEFAULT = 0.01
M_DEFAULT = 1

def target(x, y):
    return np.array([np.sin(x) + y])

def constraint(x, y):
    return np.sin(x)*np.sin(y) + 0.95


def u(x: np.array, z: np.array, y:np.array, rho = RHO_DEFAULT):
    """
    params
    z: np.array for constraint
    x: np.array for value of x from k+1 iteration
    y: np.array for lambda of lagrangian for kth iteration
    rho: convergence parameter rho
    M : some large number (hyperparameter) 
    """
    return target(x[0], x[1]) + q_i(z, x, y, rho, M = 1)
    
    

def h_i(x: np.array , z: np.array,  y:np.array, rho = RHO_DEFAULT, M = M_DEFAULT):
    """
    params
    z: np.array for constraint
    x: np.array for value of x from k+1 iteration
    y: np.array for lambda of lagrangian for kth iteration
    rho: convergence parameter rho
    M : some large number (hyperparameter) 
    """
    return np.int64(constraint(z[0], z[1]) > 0) + q_i(z, x, y, rho, M)

def q_i(x: np.array, z: np.array, y : np.array, rho = 0.01, M = 1):
    return rho / (2*M) * (np.linalg.norm(x - z + y/rho) ** 2)

# def ei_constraint(z: np.array, x: np.array , theta: np.float64, rho = 0.01, M = 1):
#     theta = 

In [3]:
pbounds = {'x': (0, 6), 'y': (0, 6)}

# Create a BayesianOptimization Object

Enter the target function to be maximized, its variable(s) and their corresponding ranges. A minimum number of 2 initial guesses is necessary to kick start the algorithms, these can either be random or user defined.

In [4]:
t_optim = BayesianOptimization(
    f=target,
    pbounds=pbounds,
    verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
    random_state=1,
)
c_optim = BayesianOptimization(
    f=constraint,
    pbounds=pbounds,
    verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
    random_state=1,
)



#xi here is slack of utility function
utility = UtilityFunction(kind="ei", kappa=2.5, xi=1e-6)
# c_utility = UtilityFunction(kind="constraint", kappa=2.5, xi=1e-6)

In [5]:
next_point_to_probe = t_optim.suggest(utility)
print("Next point to probe is:", next_point_to_probe)

t_val = target(**next_point_to_probe)
print("Found the target value to be:", t_val)

Next point to probe is: {'x': 2.502132028215444, 'y': 4.321946960652949}
Found the target value to be: [4.91870969]


In [6]:
from scipy.stats import norm

def ADMMBO(pbounds : dict, target : callable, constraints : np.array, regulariser : callable, 
           num_constraints : int, num_inits_f : int, num_inits_constraint : np.array,
           init_ys : np.array = None, init_zs : np.array = None, init_xs : np.array = None,
           rho : float = 1e-1, M : int = 20, epsilon : float = 0.05, max_iter_outer = 3,
           max_iter_OPT : int = 5, max_iter_FEAS : int = 5):
    
    #Renaming a bit
    n = num_inits_f
    m = num_inits_constraint
    
    assert num_constraints == num_inits_constraint.shape[0]
    
    dim_space = len(pbounds)
    
    utility = UtilityFunction(kind="ei", kappa=2.5, xi=1e-6)
    
    c_optims = []
    for i in range(num_constraints):
        c_optim = BayesianOptimization(
                #FEAS here is without the regularizer
                    f=constraints[i],
                    pbounds=pbounds,
                    verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
                    random_state=1,
                )
        c_optims.append(c_optim)
    
    
    #Get initial points and evaluate
    
    #Initialise evaluation arrays
    F = np.zeros(n)
    Cs = np.zeros((num_constraints, np.max(m)))
    if not init_xs:
        init_xs = np.zeros((n, dim_space))
        #Generate the initial x points and evaluate all these points
        for i in range(n):
            #The problem is this outputs a dictionary ...
            x_i = t_optim.suggest(utility)
            init_xs[i] = list(x_i.values())
            F[i] = target(**x_i)
    else:
        assert init_xs.shape[0] == num_inits_f
        assert init_xs.shape[1] == dim_space
        
        for i in range(n):
            F[i] = target(init_xs[i])
    
    if not init_zs:
        init_zs = np.zeros((num_constraints, np.max(m), dim_space))
        # Generate but do not evaluate all the constraint points zs (as these are evaluated later)
        for j in range(num_constraints):
            for k in range(m[j]):
                c_optim = c_optims[j]
                z_i = c_optim.suggest(utility)
                init_zs[j,k] = list(z_i.values())
            
            #Now register the points (this is done behind the scenes with probe. 
            #Lazy = True just saves some computation by only calculating inverses when necessary)
            for k in range(m[j]):
                point = init_zs[j,k]
                Cs[j,k] = constraints[j](*point)
                c_optim.register(
                        params = point,
                        target = Cs[j,k]
                        )
            c_optim._gp.fit(c_optim._space.params, c_optim._space.target)
    else:
        assert init_zs.shape[0] == num_constraints
        assert init_zs.shape[1] == np.max(m)
        assert init_zs.shape[2] == dim_space
        assert False, "not implemented yet"
    if not init_ys:
        init_ys = np.random.uniform(0, 1, size = (num_constraints, dim_space))
    else:
        assert init_ys.shape[0] == num_constraints, "number of initialised lambda values must be equal to number of constraints"
    
    ##MAIN LOOP
    
    solved = False
    k = 0
    xs = init_xs
    zs = init_zs
    ys = init_ys
    
    #Use first slice of zs as current best value of zs
    best_z = zs[:, 0, :]
    
    while k < max_iter_outer and not solved:
        best_x, xs, F = run_opt(target, regulariser, xs, F, pbounds, best_z, ys, rho, max_iter = max_iter_OPT)
        
        best_z, zs, Cs, ys, c_optims, rho, solved = run_feas(
            constraints=constraints, regulariser=regulariser, 
            z_mins_prev=best_z, m=m, x=best_x, 
            Cs=Cs, c_optims=c_optims, zs=zs, ys=ys, 
            pbounds=pbounds, rho=rho, M=M, epsilon=epsilon,
            max_iter=max_iter_FEAS)
        k+=1
    
    if solved:
        return best_x, best_z, xs, F, zs, Cs
    else:
        return find_approx(xs=xs, F=F, zs=zs, Cs=Cs, c_optims=c_optims, pbounds = pbounds)
            

#All good, does as expected
def run_opt(target : callable, regulariser : callable, xs : np.array, F : np.array, 
            pbounds : dict, zs : np.array, ys : np.array, rho : float, max_iter = 1):

    assert len(ys) == len(zs)
    
    #We need a new t_optim function for each run because the posterior will be different based on each new z
    temp_optim = BayesianOptimization(
            f=None,
            pbounds=pbounds,
            verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
            random_state=1,
        )

    U = np.zeros(len(xs))

    #This updates the GP posterior
    for i in range(xs.shape[0]):
        q_sum = np.sum(np.array([regulariser(xs[i], zs[j], ys[j], rho = rho) for j in range(zs.shape[0])]))
        U[i] = F[i] + q_sum

        #This here registers and runs the optimisation step, 
        #would be better if I could make this lazy but requires editing underlying code
        
        try:
            temp_optim.register(
                params = xs[i],
                target = -U[i]
            )
        except:
            print(f"TARGET: we already have {xs[i]}, but it tried adding")
            assert False
    
    #Fit all the points added
    temp_optim._gp.fit(temp_optim._space.params, temp_optim._space.target)
    
    utility = UtilityFunction(kind="ei", kappa=2.5, xi=1e-6) 
    for _ in range(max_iter):
        next_point = t_optim.suggest(utility)

        xs = np.concatenate((xs, np.array(list(next_point.values())).reshape(1, xs.shape[1])), axis = 0)
        
        F = np.concatenate((F, target(**next_point)))
        
        q_sum = np.sum(np.array([regulariser(xs[-1], zs[j], ys[j], rho = rho) for j in range(zs.shape[0])]))
        U[i] = F[-1] + q_sum
        try:
            #Will add -U[-1], else it will try and maximise, but we want to minimise
            temp_optim.register(
                params = next_point,
                target = -U[-1]
            )
        except:
            print(f"TARGET: we already have {xs[i]}, but it tried adding")

    argmin = np.argmin(U)
    xmin = xs[argmin]
    
    del temp_optim
    return (xmin, xs, F)
    
def run_feas(constraints : np.array, regulariser : callable, z_mins_prev : np.array, m : np.array,
            x : np.array, Cs : np.array, c_optims : np.array, zs : np.array, ys : np.array, 
            pbounds : dict, rho : float, M : int, epsilon : float, max_iter = 1):


    #Very unintuitive naming for rs and ss but it basically means the "r's" and "s's" 
    # used in the paper for later determining if we're done or not
    z_mins = np.zeros((zs.shape[0], zs.shape[2])); rs = np.zeros((zs.shape[0], zs.shape[2])); 
    ss = np.zeros((zs.shape[0], zs.shape[2]));

    #For each constraint
    for j in range(zs.shape[0]):

        c_optim = c_optims[j]

        #TODO: Could probably be done more efficiently
        #Evaluates H over all known points
        H = (np.int64(Cs[j] > 0)
                + np.array([regulariser(x, zs[j, a ,:], ys[j], rho, M) for a in range(zs.shape[1])]))
        h_plus = np.min(H)
                

        #Might be able to do this more efficiently because its just minimising 
        # some norm so set z = x + y and truncate when out of bounds
        
        utility = UtilityFunction(kind="constraint", kappa=2.5, xi=1e-6, 
                                  x_constraint = x, y_constraint = ys[j], rho = rho, M = M, h_plus = h_plus)

        
        for iteration in range(max_iter):
            #Add something to h
            #If we're at the max number of constraint m, we need to concat, otherwise just assign
            
            next_point = c_optim.suggest(utility)
            
            if m[j] == np.max(m):
                zs = np.concatenate((zs, np.zeros((zs.shape[0], 1, zs.shape[2]))), axis = 1)
                Cs = np.concatenate((Cs, np.zeros((Cs.shape[0], 1))), axis = 1)
                m[j]+=1
  
            zs[j, m[j]-1] = np.array(list(next_point.values()))
            
            #THIS IS CLEARLY NOT WORKING, returning 0 when should be returning 0.95 for example
            Cs[j, m[j]-1] = constraints[j](*list(next_point.values()))

            H_new = int(Cs[j, -1] > 0) + regulariser(x, zs[j,-1,:], ys[j], rho, M)
            H = np.concatenate((H, np.array([H_new])))

            try:
                c_optim.register(
                    params = next_point,
                    target = Cs[j, -1]
#                     target = H_new
                )
            except:
                print(f"CONSTRAINT: tried adding {next_point} but looks like we already have that point")


            
        z_mins[j, :] = zs[j, np.argmin(H), :]
        ys[j, :] += rho * (x - z_mins[j, :])
        rs[j, :] = x - z_mins[j, :] 
        ss[j, :] = -rho * (z_mins[j, :] - z_mins_prev[j, :])
    
    is_solved = (np.linalg.norm(rs) < epsilon) & (np.linalg.norm(ss) < epsilon)
    
    mu = 10
    tao = 2
    
    #Changing rho as recommended by the paper where we have hardcoded mu and tao here
    if np.linalg.norm(rs) > mu * np.linalg.norm(ss):
        rho*=tao
    elif np.linalg.norm(rs) < mu * np.linalg.norm(ss):
        rho/=tao
    else:
        pass
        
    return z_mins, zs, Cs, ys, c_optims, rho, is_solved

def find_approx(xs : np.array, F : np.array, zs : np.array, Cs: np.array, 
                c_optims : np.array, pbounds : dict, delta : float = 0.1):
    
    #Just using this object to get the GP mean later
    final_optim = BayesianOptimization(
            f=None,
            pbounds=pbounds,
            verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
            random_state=1,
        )
        
    #Check through current xs to find candidate
    
    #Initialises
    F_min = np.inf
    approx_x_min = xs[0]
    
    #Sets the GP posterior
    for i in range(xs.shape[0]):
        final_optim.register(
            params = xs[i],
            target = F[i]
        )
        
        #Check if it's a candidate for a minimum
        if F[i] < F_min:
            
            #Check if high prob of satisifying constraint
            satisfied = 1
            for c_optim in c_optims:
                mean, std = c_optim._gp.predict(xs[i].reshape(1,-1), return_std=True)
                satisfied *= int(norm.cdf(-mean/std) > 1 - delta)
            
            #Only if all constraints are likely satisfied then update it as the minimum candidate
            if satisfied == 1:
                F_min = F[i]
                approx_x_min = xs[i]
    
    
    final_optim._gp.fit(final_optim._space.params, final_optim._space.target)
    
    #Check through zs
    feasibles = zs[np.where(Cs < 0)]
    for point in feasibles:
        #If the expected value of the function at this point is better than our current optimum, update
        E_f = final_optim._gp.predict(point.reshape(1,-1))
        if E_f < F_min:
            F_min = E_f
            approx_x_min = point

    #Return the best estimate and a bunch of debugging terms
    print(F_min)
    return approx_x_min, c_optims, xs, F, zs, Cs

In [7]:
constraints = np.array([constraint])
num_constraints = 1
num_inits_f = 20
num_inits_constraint = np.array([20])
regulariser = q_i

#Doesn't fkn converge ripppppp, they seem to diverge instead
answers = ADMMBO(pbounds, target, constraints, regulariser, num_constraints, num_inits_f, 
                 num_inits_constraint, rho = 0.1, max_iter_outer=10, max_iter_OPT=20, max_iter_FEAS=20)
x_min, c_optims, xs, F, zs, Cs = answers
print(x_min)

[0.39436013]
[4.62311052 1.39044669]


In [23]:
constraint(*x_min)

-0.02986295626429869

In [24]:
target(*x_min)

array([0.39442936])

In [10]:
constraint(4.7, 1.3)

-0.013484239614913518

In [11]:
target(4.7, 1.3)

array([0.30007674])

In [21]:
pots = xs[(F<1) & (xs[:,0] > 4) & (xs[:,1] <2)]


In [22]:
for point in pots:
    check = constraint(*point)
    print(check)
    if check < 0 :
        print(target(*point))
        print(point)

0.3920794692807217
0.33712742399249396
-0.030968838008299526
[0.40202036]
[4.80379604 1.39784564]
0.18335758957901294
0.5687325606875853
0.8628124293116438
-0.036614126623246035
[0.71745037]
[4.79162155 1.71431311]
0.6160217496354283
0.055865542080778985
0.8164179583349523
0.7448057699245958
0.1863314795295623
0.6322896519964898
0.7835783494011984
0.26718598267267946


In [14]:
zs[0][np.argmin(Cs, axis = 1)]

array([[4.70534404, 1.47141456]])

In [15]:
Cs.shape

(1, 220)

In [16]:
xs[np.argmin(F)]

array([4.96269283, 0.09011388])

[-0.01044753]
