# Structured Bayesian Optimisation with Pyro
## Testing performance

Goal: compare performances of standard BO-GP, random GP and SBO-GP strategies to minimize Branin-Hoo and hyperbolic functions with pyro.

### Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math

import torch
import torch.nn as nn
from torch.distributions import constraints, transform_to
import torch.optim as optim
import torch.autograd as autograd

import pyro
import pyro.distributions as dist
import pyro.contrib.gp as gp
from pyro.nn import PyroSample, PyroModule
from pyro.infer import autoguide, SVI, Trace_ELBO

In [2]:
print(torch.__version__)
print(pyro.__version__)

1.3.0.post2
1.0.0


### Setting random seed

In [3]:
def set_random_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

### Minimalistic (S)BO approach

#### Acquisition function

In [4]:
def lower_confidence_bound(model, x, kappa=2):
    """ Lower Confidence Bound (LCB): $\alpha(x)=\mu(x) - \kappa\sigma(x)$ """
    
    mu, variance = model(x)
    sigma = variance.sqrt()
    
    return mu - kappa * sigma

normal_phi = lambda x: torch.exp(-x.pow(2)/2)/np.sqrt(2*np.pi)
normal_Phi = lambda x: (1 + torch.erf(x / np.sqrt(2))) / 2

def expected_improvement(model, x):
    """ Brooks' implementation of expected improvement (EI). """
    
    y_min = model.y.min()
    mu, variance = model(x)
    sigma = variance.sqrt()
    delta = y_min - mu
    EI = delta.clamp_min(0.0) + sigma*normal_phi(delta/sigma) - delta.abs()*normal_Phi(delta/sigma)
    
    return -EI

def acquisition_func(model, x, af='EI'):
    """ Defines acquisition function. """

    if af == "EI":
        return expected_improvement(model, x)
    elif af == "LCB":
        return lower_confidence_bound(model, x)
    else:
        return None

#### Model training

In [5]:
def train(model, num_steps=1000, adam_params={"lr":0.1}):
    """ Trains the semi-parametric model. """
    
    # TODO: check if param store needs to be cleared here.
    pyro.clear_param_store()
    # setup the inference algorithm
    guide = autoguide.AutoMultivariateNormal(model.model)
    optimizer = pyro.optim.Adam(adam_params)
    loss = Trace_ELBO()
    svi = SVI(model.model, guide, optimizer, loss)
    # do gradient steps
    losses = []
    for _ in range(num_steps):
        losses.append(svi.step())
    
    return losses, guide

def train_gp(model, num_steps=1000, adam_params={"lr":0.1}):
    """ Trains the gp model. """
    
    optimizer = torch.optim.Adam(model.parameters(), lr=adam_params['lr'])
    loss_fn = pyro.infer.TraceMeanField_ELBO().differentiable_loss
    losses = gp.util.train(model, optimizer, loss_fn, num_steps=num_steps)

    return losses, None

#### Finding new candidates

In [6]:
def find_a_candidate(model, x_init, constr, num_steps=1000, lr=0.1):
    """ Finds new candidate """
    
    def transf_values(values, constr, dims, inv_mode=False):
        """ Transforming (un)constrained variables to (un)constrained domain """
        
        x_tmp = ()
        for i in range(dims):
            if inv_mode:
                x_tmp += (transform_to(constr[i]).inv(values[:, i]), )
            else:
                x_tmp += (transform_to(constr[i])(values[:, i]), )
            
        x = torch.stack(x_tmp, dim=1)
        return x
            
    x_dims = x_init.shape[-1]
    
    x_uncon_init = transf_values(x_init, constr, x_dims, inv_mode=True)
    x_uncon = x_uncon_init.clone().detach().requires_grad_(True)
    
    # unconstrained minimiser 
    # TODO: at the moment we are using torch optimizer, should we change to pyro?
    
    minimizer = optim.Adam([x_uncon], lr=lr)
    
    #minimizer = optim.LBFGS([x_uncon], line_search_fn='strong_wolfe')
    
    def closure():
        minimizer.zero_grad()
        x = transf_values(x_uncon, constr, x_dims)
        y = acquisition_func(model, x)
        autograd.backward(x_uncon, autograd.grad(y, x_uncon))      
        return y
    
    for _ in range(num_steps):
        minimizer.step(closure)
   
    x = transf_values(x_uncon, constr, x_dims)
    
    return x.detach()

In [7]:
def next_x(model, constr, num_candidates=5, num_steps=1000, lr=0.1):
    """ Finds the next best candidate on the acquisition function surface """
    
    candidates = []
    values = []
    
    # start with last step
    x_init = model.X[-1:]
    for i in range(num_candidates):

        x = find_a_candidate(model, x_init, constr, num_steps=num_steps, lr=lr)
        y = acquisition_func(model, x)
    
        candidates.append(x)
        values.append(y)
        
        # a new random attempt initial point
        x_init = torch.stack((
                x[:,0].new_empty(1).uniform_(const_x1_min, const_x1_max),
                x[:,1].new_empty(1).uniform_(const_x2_min, const_x2_max)), dim=1)
        
    argmin = torch.min(torch.cat(values), dim=0)[1].item()
        
    return candidates[argmin]

#### Updating posterior

In [None]:
def update_posterior(model, obj_function, x_new, num_steps=1000, adam_params={"lr":0.1}, 
                    gp_mode=False):
    
    # evaluate f at new point
    bh_y = obj_function(x_new) 
        
    # incorporate new evaluation
    model.X = torch.cat([model.X, x_new]) 
    model.y = torch.cat([model.y, bh_y])
    
    if not gp_mode:
        losses, guide = train(model, num_steps=num_steps, adam_params=adam_params)
    else:
        losses, guide = train_gp(model, num_steps=num_steps, adam_params=adam_params)
    
    return guide

### Helper functions

In [None]:
def plot_obj(obj_function):

    steps = 1000
    strides = 200

    X1 = torch.linspace(const_x1_min, const_x1_max, steps)
    X2 = torch.linspace(const_x2_min, const_x2_max, steps)

    X1_mesh, X2_mesh = torch.meshgrid(X1, X2)
    
    Z_mesh = obj_function(torch.stack((X1_mesh.flatten(), X2_mesh.flatten()), dim=1)).reshape(steps, steps)
    plt.contour(
        X1_mesh.detach().numpy(), 
        X2_mesh.detach().numpy(), 
        Z_mesh.detach().numpy(), strides)
    
    plt.colorbar()

In [None]:
def find_best_solution(xmins, target_lms):
    closest_dist = np.inf
    closest_point = None
    
    for xmin in xmins:
        for bh_lm in target_lms:
            dist = np.linalg.norm(xmin-bh_lm)
            if dist < closest_dist: 
                closest_dist = dist
                closest_point = xmin
                
    return closest_point, closest_dist

# Branin-Hoo example

In [None]:
const_x1_min = -5
const_x1_max = 10

const_x2_min = 0
const_x2_max = 15

# Creating constraints
constr = [
    constraints.interval(const_x1_min, const_x1_max),
    constraints.interval(const_x2_min, const_x2_max)
]

def branin_hoo(x):
    """ Compute Branin-Hoo function for fixed constants """
    a = 1.0
    b = 5.1 / (4 * np.pi**2)
    c = 5.0 / np.pi
    r = 6.0
    s = 10.0
    t = 1.0 / (8 * np.pi)
    x1 = x[...,0]
    x2 = x[...,1]
    return a * (x2 - b*x1**2 + c*x1 - r)**2 + s*(1 - t)*torch.cos(x1) + s


branin_hoo_lms_np = np.stack(
    (np.array([-math.pi, math.pi, 9.42478]), 
     np.array([12.275, 2.275, 2.475])), axis=1)

# Checking LMs
branin_hoo_lms = torch.stack(
    (torch.tensor([-math.pi, math.pi, 9.42478]), 
     torch.tensor([12.275, 2.275, 2.475])), dim=1)

assert np.allclose(branin_hoo(branin_hoo_lms).numpy(),
        np.array([0.397887, 0.397887, 0.397887], dtype=np.float32), rtol=1e-6)

In [None]:
plot_obj(branin_hoo)

In [None]:
set_random_seed(123)

## Parameters

In [None]:
# Training points
N_train = 10
X_train = torch.rand(N_train, 2)*15 + torch.FloatTensor([const_x1_min, const_x2_min])
y_train = branin_hoo(X_train)

# Optimiser parameters
adam_num_steps = 1000
adam_params={"lr": 0.1}

num_candidates = 10

bo_steps = 20

num_tests = 20

## BO approach

In [None]:
bo_tests_search_points = []

for test_i in range(num_tests):
    try:
        print("TEST: ", test_i+1)

        pyro.clear_param_store()

        gp_model_svi = gp.models.GPRegression(X_train, y_train, 
                                              gp.kernels.Matern52(input_dim=X_train.shape[1], 
                                                                  lengthscale=100*torch.ones(X_train.shape[1])))

        # Set priors
        gp_model_svi.kernel.lengthscale = pyro.nn.PyroSample(dist.LogNormal(3, 1).expand([2]).to_event())
        gp_model_svi.kernel.variance = pyro.nn.PyroSample(dist.LogNormal(5, 2))
        gp_model_svi.noise = pyro.nn.PyroSample(dist.LogNormal(0, 1))

        # Set guides
        gp_model_svi.kernel.autoguide("lengthscale", dist.Normal)
        gp_model_svi.kernel.autoguide("variance", dist.Normal)
        gp_model_svi.autoguide("noise", dist.Normal)

        # optimise
        losses, _ = train_gp(gp_model_svi, num_steps=adam_num_steps, adam_params=adam_params)

        xmins = np.zeros([bo_steps, 2], np.float32)

        for i in range(bo_steps):
            xmin = next_x(gp_model_svi, constr, num_candidates=num_candidates, num_steps=adam_num_steps)

            update_posterior(gp_model_svi, branin_hoo, xmin, 
                             num_steps=adam_num_steps, adam_params=adam_params, gp_mode=True)

            xmins[i] = xmin.detach().numpy()

            closest_point, closest_dist = find_best_solution([xmins[i]], branin_hoo_lms_np)
            print("  BO STEP: ", i+1, "xmin:", xmins[i], " distance: ", closest_dist)

        # saving results for the run
        np.savetxt("pyro_results/pyro_bo_%d.out" % (test_i), xmins, delimiter=',')

        closest_point, closest_dist = find_best_solution(xmins, branin_hoo_lms_np)
        print("  Best candidate: ", closest_point, " distance: ", closest_dist)

        bo_tests_search_points.append(xmins)
    except:
        continue
