**Multi-objective optimization with feasibility aware Expected Hyper-volume Improvement**

In [1]:
import pandas as pd
import numpy as np
import torch
import botorch
import gpytorch
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from botorch.models.gp_regression import SingleTaskGP
from botorch.models.model_list_gp_regression import ModelListGP
from botorch.models.transforms.outcome import Standardize
from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood
from botorch.utils.transforms import unnormalize, normalize

In [3]:
from botorch.optim.optimize import optimize_acqf, optimize_acqf_list
from botorch.acquisition.objective import GenericMCObjective
from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization
from botorch.utils.multi_objective.box_decompositions.non_dominated import (
    FastNondominatedPartitioning,
)
from botorch.acquisition.multi_objective.monte_carlo import (
    qExpectedHypervolumeImprovement,
    qNoisyExpectedHypervolumeImprovement,
)
from botorch.utils.sampling import sample_simplex

In [4]:
from botorch import fit_gpytorch_mll
from botorch.exceptions import BadInitialCandidatesWarning
from botorch.sampling.normal import SobolQMCNormalSampler
from botorch.utils.multi_objective.box_decompositions.dominated import (
    DominatedPartitioning,
)
from botorch.utils.multi_objective.pareto import is_non_dominated

Reading input data

In [5]:
filename = r'../data/olhs_run1.xlsx'
x_pd = pd.read_excel(filename, sheet_name='Initial Design (OLHS)', header=[0,1], index_col=[0])
y_pd = pd.read_excel(filename, sheet_name='bo_data', header=[0,1], index_col=[0])

# no normalization yet

dtype=torch.double

# temporarily take out some data for validation
validation_idx = [1,7,15]

train_x_pd = x_pd.drop(validation_idx)
train_y_pd = y_pd.drop(validation_idx)

# which properties to read from labels
objective_properties = ['Polymer Solubility', 'Gelation Enthalpy', 'Shear Modulus']

# make torch tensors 
train_x = torch.tensor(train_x_pd.values, dtype=dtype)
train_y_list = []
for prop in objective_properties:
    train_y_list.append(
        torch.tensor(train_y_pd[prop].values, dtype=dtype)
    )
train_mfg_labels = torch.tensor(train_y_pd['Manufacturability'].values, dtype=torch.long).squeeze()

bounds = np.array([[2000, 10000], [0, 100], [0, 40], [5000, 15000], [80, 100], [0,100], [60, 100], [70, 100]])
bounds = torch.tensor(bounds.T, dtype=dtype)

# Normalize input parameters
train_x = normalize(train_x, bounds=bounds) # normalized to unit hypercube - all values betn 0 and 1

Define and initialize classification model

In [6]:
# define classification model using gpytorch
#class definition
class DirichletGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_classes):
        super(DirichletGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size((num_classes,)))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size((num_classes,))),
            batch_shape=torch.Size((num_classes,)),
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# function to optimize parameters of the classification GP - 
def train_cls_gp(model, likelihood, train_x, training_iter):
   # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    for i in range(training_iter):
        # Zero gradients from previous iteration
        optimizer.zero_grad()
        # Output from model
        output = model(train_x)
        # Calc loss and backprop gradients
        loss = -mll(output, likelihood.transformed_targets).sum()
        loss.backward()
        optimizer.step()

    return model, likelihood

In [7]:
#initialize classification model 
cls_likl = gpytorch.likelihoods.DirichletClassificationLikelihood(train_mfg_labels, learn_additional_noise=False, alpha_epsilon=1e-4)
cls_model = DirichletGPModel(train_x, cls_likl.transformed_targets.double(), cls_likl, num_classes=cls_likl.num_classes)

Define  and initalzie Regression models

In [11]:
models = []
for data in train_y_list:
    models.append(
        SingleTaskGP(train_x, data, outcome_transform=Standardize(m=1))
    )
model = ModelListGP(*models)
mll = SumMarginalLogLikelihood(model.likelihood, model)

Define Acquisition function helper functions

$$constrained \hspace{3mm} acf = P * acf$$

In [45]:
def constraint_func(X):
    #calculate probability of class 1 for X
    cls_likl.eval()
    cls_model.eval()
    with torch.no_grad():
        logit_dist = cls_model(X)
    samples = logit_dist.sample(torch.Size((256,))).exp()
    class_probs = (samples / samples.sum(-2, keepdim=True)).mean(0)
    feasible_class_probs = class_probs[1]

    threshold = 0.7
    feasibility_condition = threshold - feasible_class_probs    #negative value implies feasibility
    
    return feasibility_condition.view(-1,1)

In [20]:
# following the botorch tutorial
BATCH_SIZE = 4      # Number of candidates selected in each BO run/iteration
NUM_RESTARTS = 10   # Restarts during BO run
RAW_SAMPLES = 512   

# function to optimize acquisition function
standard_bounds = torch.zeros(2, 8)
standard_bounds[1] = 1

ref_point = torch.tensor([18, 0.1, 0.01], dtype=dtype)

def optimize_qehvi_and_get_observation(model, train_x, train_obj, sampler):
    """Optimizes the qEHVI acquisition function, and returns a new candidate and observation."""
    # partition non-dominated space into disjoint rectangles
    with torch.no_grad():
        pred = model.posterior(normalize(train_x, bounds)).mean
    
    partitioning = FastNondominatedPartitioning(
        ref_point= ref_point,
        Y=pred,
    )

    acq_func = qExpectedHypervolumeImprovement(
        model=model,
        ref_point=ref_point,
        partitioning=partitioning,
        sampler=sampler,
        constraints=[constraint_func],
    )

    # optimize
    candidates, acq_vals = optimize_acqf(
        acq_function=acq_func,
        bounds=standard_bounds,
        q=BATCH_SIZE,
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES,  # used for intialization heuristic
        options={"batch_limit": 5, "maxiter": 200},
        sequential=True,
    )
    
    # observe new values
    new_x = unnormalize(candidates.detach(), bounds=bounds)

    return new_x, acq_vals

Building the BO framework

In [16]:
hvs_qehvi = []

# fit regression models
fit_gpytorch_mll(mll)

#fit classification model
cls_model, cls_likl = train_cls_gp(cls_model, cls_likl, train_x, training_iter=50)

# define acquisition modules
sampler = SobolQMCNormalSampler(sample_shape=torch.Size([128]))



In [21]:
new_x_qehvi, new_acq_vals = optimize_qehvi_and_get_observation(model, train_x, train_y_list, sampler)

RuntimeError: Sizes of tensors must match except in dimension 2. Expected size 8 but got size 3 for tensor number 1 in the list.

need to change the optimize_qehvi function?

Add constraint function to qEHVI definition

# **Testing**

In [22]:
# following the botorch tutorial
BATCH_SIZE = 4      # Number of candidates selected in each BO run/iteration
NUM_RESTARTS = 10   # Restarts during BO run
RAW_SAMPLES = 512   

# function to optimize acquisition function
standard_bounds = torch.zeros(2, 8)
standard_bounds[1] = 1

ref_point = torch.tensor([18, 0.1, 0.01], dtype=dtype)

In [23]:
# partition non-dominated space into disjoint rectangles
with torch.no_grad():
    pred = model.posterior(normalize(train_x, bounds)).mean

In [26]:
partitioning = FastNondominatedPartitioning(
    ref_point= ref_point,
    Y=pred,
)

In [28]:
acq_func = qExpectedHypervolumeImprovement(
    model=model,
    ref_point=ref_point,
    partitioning=partitioning,
    sampler=sampler,
    constraints=[constraint_func],
)

In [48]:
def constraint_func(X):
    #calculate probability of class 1 for X
    cls_likl.eval()
    cls_model.eval()
    with torch.no_grad():
        logit_dist = cls_model(X)
    samples = logit_dist.sample(torch.Size((256,))).exp()
    class_probs = (samples / samples.sum(-2, keepdim=True)).mean(0)
    feasible_class_probs = class_probs[1]

    threshold = 0.7
    feasibility_condition = threshold - feasible_class_probs    #negative value implies feasibility
    
    return feasibility_condition.view(-1,1)


from botorch.optim.initializers import gen_batch_initial_conditions
Xinit = gen_batch_initial_conditions(acq_func, bounds, q=BATCH_SIZE, num_restarts=NUM_RESTARTS, raw_samples=RAW_SAMPLES)

RuntimeError: Sizes of tensors must match except in dimension 2. Expected size 8 but got size 3 for tensor number 1 in the list.

In [None]:

# optimize
candidates, acq_vals = optimize_acqf(
    acq_function=acq_func,
    bounds=standard_bounds,
    q=BATCH_SIZE,
    num_restarts=NUM_RESTARTS,
    raw_samples=RAW_SAMPLES,  # used for intialization heuristic
    options={"batch_limit": 5, "maxiter": 200},
    sequential=True,
)

# observe new values
new_x = unnormalize(candidates.detach(), bounds=bounds)


In [42]:
def constraint_func(X):
    s, B, q, d = X.shape  # Extract the dimensions
    cls_likl.eval()
    cls_model.eval()
    with torch.no_grad():
        # Reshape to (s * B * q, d) to pass through the model
        X_flat = X.view(-1, d)
        logit_dist = cls_model(X_flat)
        
        # Sample and compute class probabilities
        samples = logit_dist.sample(torch.Size((256,))).exp()
        class_probs = (samples / samples.sum(-2, keepdim=True)).mean(0)
        feasible_class_probs = class_probs[1]
        
        threshold = 0.7
        feasibility_condition = threshold - feasible_class_probs  # Negative value implies feasibility
        
        # Reshape the feasibility condition to (s, B, q, 1)
        feasibility_condition = feasibility_condition.view(s, B, q, 1)
    
    return feasibility_condition

constraint_func(train_x)

ValueError: not enough values to unpack (expected 4, got 2)