In [1]:
import torch
import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import norm

import gpytorch
from gpytorch.models import ExactGP
from gpytorch.likelihoods import DirichletClassificationLikelihood
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel



In [2]:
# GP class for regression
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
    
    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# GP class for classification
class DirichletGPModel(ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_classes):
        super(DirichletGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = ConstantMean(batch_shape=torch.Size((num_classes,)))
        self.covar_module = ScaleKernel(
            RBFKernel(batch_shape=torch.Size((num_classes,))),
            batch_shape=torch.Size((num_classes,)),
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# function to optimize parameters of the classification GP - 
def train_cls_gp(model, likelihood, train_x, training_iter):
   # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    for i in range(training_iter):
        # Zero gradients from previous iteration
        optimizer.zero_grad()
        # Output from model
        output = model(train_x)
        # Calc loss and backprop gradients
        loss = -mll(output, likelihood.transformed_targets).sum()
        loss.backward()
        if i + 1 == training_iter:
            print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
                i + 1, training_iter, loss.item(),
                model.covar_module.base_kernel.lengthscale.mean().item(),
                model.likelihood.second_noise_covar.noise.mean().item()
            ))
        optimizer.step()

    return model, likelihood

# function to optimize parameters of the regression GP -
def train_reg_gp(model, likelihood, train_x, train_y, training_iter):
   # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    for i in range(training_iter):
        # Zero gradients from previous iteration
        optimizer.zero_grad()
        # Output from model
        output = model(train_x)
        # Calc loss and backprop gradients
        loss = -mll(output, train_y)
        loss.backward()
        if i + 1  == training_iter:
            print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
                i + 1, training_iter, loss.item(),
                model.covar_module.base_kernel.lengthscale.item(),
                model.likelihood.noise.item()
            ))
        optimizer.step()

    return model, likelihood 

In [3]:
# generates training data
def gen_data(num_data, seed = 2024):
    torch.random.manual_seed(seed)

    x = torch.randn(num_data,1)
    y = torch.randn(num_data,1)

    # u = torch.rand(1)
    data_fn_obj = lambda x,y: torch.cos(2*x)*torch.cos(y) + torch.sin(x) # toy function
    latent_fn_obj = data_fn_obj(x, y)
    z_obj = latent_fn_obj.squeeze()
    data_fn_cnt = lambda x, y: torch.cos(x)*torch.cos(y) - torch.sin(x)*torch.sin(y)
    latent_fn_cnt = data_fn_cnt(x, y)
    z_cnt = torch.heaviside(latent_fn_cnt - 0.5, torch.zeros(latent_fn_cnt.shape)).long().squeeze()
    return torch.cat((x,y),dim=1), z_obj, z_cnt, data_fn_obj, data_fn_cnt

# generate training data - y= objective values, c=class label
train_x, train_y, train_c, genfn_obj, genfn_cnt = gen_data(50)

#initialize likelihood and model - regression
reg_likelihood = gpytorch.likelihoods.GaussianLikelihood()
reg_model = ExactGPModel(train_x, train_y, reg_likelihood)

# initialize likelihood and model - we let the DirichletClassificationLikelihood compute the targets for us
cls_likelihood = DirichletClassificationLikelihood(train_c, learn_additional_noise=True)
cls_model = DirichletGPModel(train_x, cls_likelihood.transformed_targets, cls_likelihood, num_classes=cls_likelihood.num_classes)

# Train regression and classification model
training_iter = 50
print("Training Regression model")
reg_model, reg_likelihood = train_reg_gp(reg_model, reg_likelihood, train_x, train_y, training_iter)
print("Training Classification model")
cls_model, cls_likelihood = train_cls_gp(cls_model, cls_likelihood, train_x, training_iter)

Training Regression model
Iter 50/50 - Loss: -0.396   lengthscale: 1.125   noise: 0.007
Training Classification model
Iter 50/50 - Loss: 4.470   lengthscale: 0.862   noise: 0.582


In [4]:
# test data generate - uniform across [-3, 3] x [-3, 3]
test_d1 = np.linspace(-3, 3, 20)
test_d2 = np.linspace(-3, 3, 20)

test_x1_mat, test_x2_mat = np.meshgrid(test_d1, test_d2)
test_x1_mat, test_x2_mat = torch.Tensor(test_x1_mat), torch.Tensor(test_x2_mat)

test_x = torch.cat((test_x1_mat.view(-1,1), test_x2_mat.view(-1,1)), dim=1)

In [5]:
def calculate_acf(pred_mean, pred_std, y_max):
    improve = y_max - pred_mean
    z_score = np.divide(improve, pred_std + 1e-9)
    acf = np.multiply(improve, norm.cdf(z_score)) + np.multiply(pred_std, norm.pdf(z_score))
    return acf


In [6]:
eval_budget = 50
num_feval = 0
bo_batch_size = 2

#store BO iteration results
obj_history = []
top_ind_history = []
feasibility_history = [] 

while num_feval < eval_budget :
    # prediction - objective values
    reg_model.eval()
    reg_likelihood.eval()

    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        obj_pred = reg_likelihood(reg_model(test_x))
        obj_pred_means = obj_pred.loc
        obj_pred_stddev = obj_pred.stddev
    
     #prediction - class probabilities
    cls_model.eval()
    cls_likelihood.eval()

    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        logit_dist = cls_model(test_x)
        logit_means = logit_dist.loc
        logit_stddevs = logit_dist.stddev

    # calculate probabilites from the logit values
    samples = logit_dist.sample(torch.Size((256,))).exp()
    class_probabilites = (samples / samples.sum(-2, keepdim=True)).mean(0)

    best_idx = np.argmin(train_y)
    y_best = train_y[best_idx]
    print("best obj value in training so far = ", y_best)

    ei = calculate_acf(obj_pred_means, obj_pred_stddev, y_best)
    con_ei = class_probabilites[0] * ei

    # top 5 candidate points from constrained ei
    top5_ind = np.argpartition(con_ei, -bo_batch_size)[-bo_batch_size:]
    top5_ind = top5_ind[np.argsort(con_ei[top5_ind])]
    print("top candidate indices = ", top5_ind)

    #locations of top candidate points
    candidate_x = test_x[top5_ind, :]

    top5_obj_vals = genfn_obj(candidate_x[:,0], candidate_x[:, 1])

    num_feval += bo_batch_size

    #store results
    obj_history.append(top5_obj_vals)
    top_ind_history.append(top5_ind)
    feasibility_history.append(class_probabilites[0][top5_ind])

    #update training data set
    train_x = torch.cat((train_x, candidate_x), dim=0)
    train_y = torch.cat((train_y, top5_obj_vals), dim=0)
    train_c = torch.cat((train_c, class_probabilites.max(0)[1][top5_ind]), dim=0)
    print('re-training the model')

    #update GP
    reg_likelihood = gpytorch.likelihoods.GaussianLikelihood()
    reg_model = ExactGPModel(train_x, train_y, reg_likelihood)
    cls_likelihood = DirichletClassificationLikelihood(train_c, learn_additional_noise=True)
    cls_model = DirichletGPModel(train_x, cls_likelihood.transformed_targets, cls_likelihood, num_classes=cls_likelihood.num_classes)

    reg_model, reg_likelihood = train_reg_gp(reg_model, reg_likelihood, train_x, train_y, training_iter)
    cls_model, cls_likelihood = train_cls_gp(cls_model, cls_likelihood, train_x, training_iter)
    print('======= BO Batch done ========')
    



best obj value in training so far =  tensor(-1.7209)
top candidate indices =  tensor([184, 204])
re-training the model
Iter 50/50 - Loss: -0.423   lengthscale: 1.137   noise: 0.007
Iter 50/50 - Loss: 4.455   lengthscale: 0.822   noise: 0.390
best obj value in training so far =  tensor(-1.9198)
top candidate indices =  tensor([205, 204])
re-training the model
Iter 50/50 - Loss: -0.458   lengthscale: 1.140   noise: 0.007
Iter 50/50 - Loss: 4.442   lengthscale: 0.777   noise: 0.256
best obj value in training so far =  tensor(-1.9324)
top candidate indices =  tensor([205, 204])
re-training the model
Iter 50/50 - Loss: -0.494   lengthscale: 1.138   noise: 0.007
Iter 50/50 - Loss: 4.404   lengthscale: 0.746   noise: 0.169
best obj value in training so far =  tensor(-1.9324)
top candidate indices =  tensor([205, 204])
re-training the model
Iter 50/50 - Loss: -0.528   lengthscale: 1.136   noise: 0.007
Iter 50/50 - Loss: 4.356   lengthscale: 0.727   noise: 0.128
best obj value in training so fa



Iter 50/50 - Loss: 3.770   lengthscale: 0.636   noise: 0.054
best obj value in training so far =  tensor(-1.9324)
top candidate indices =  tensor([204, 205])
re-training the model
Iter 50/50 - Loss: -0.898   lengthscale: 1.121   noise: 0.007
Iter 50/50 - Loss: 3.742   lengthscale: 0.634   noise: 0.051
best obj value in training so far =  tensor(-1.9324)
top candidate indices =  tensor([204, 205])
re-training the model
Iter 50/50 - Loss: -0.913   lengthscale: 1.121   noise: 0.007
Iter 50/50 - Loss: 3.716   lengthscale: 0.633   noise: 0.048
best obj value in training so far =  tensor(-1.9324)
top candidate indices =  tensor([204, 205])
re-training the model
Iter 50/50 - Loss: -0.927   lengthscale: 1.121   noise: 0.007
Iter 50/50 - Loss: 3.690   lengthscale: 0.632   noise: 0.045
best obj value in training so far =  tensor(-1.9324)
top candidate indices =  tensor([204, 205])
re-training the model
Iter 50/50 - Loss: -0.941   lengthscale: 1.120   noise: 0.007
Iter 50/50 - Loss: 3.665   lengt