In [3]:
import math
from botorch.utils import t_batch_mode_transform
import torch
from botorch.models.model_list_gp_regression import ModelListGP
from botorch.models import SingleTaskGP
from botorch.fit import fit_gpytorch_mll
from botorch.utils import standardize
from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood
from botorch.acquisition import AnalyticAcquisitionFunction
from botorch.acquisition.monte_carlo import MCAcquisitionFunction
from botorch.acquisition.monte_carlo import AcquisitionFunction
from botorch.optim.optimize import optimize_acqf
from botorch.optim.initializers import gen_batch_initial_conditions
from botorch.utils.transforms import normalize, unnormalize
from botorch.models.transforms.outcome import Standardize

#kernels
from gpytorch.kernels import RBFKernel

import sys
import os

sys.path.append(os.path.join(os.getcwd(), '..', 'toolkits'))

from metrics import HV, violation, cum_violation, cum_regret


# Problem setting: Disc Brake Design

In [4]:
from design import Design
from botorch.utils.sampling import draw_sobol_samples

problem = Design()


def test_f(X):
    return problem.evaluate(X.to(torch.float64))


bounds = problem.bounds


def generate_initial_data(n):
    # generate training data
    train_x = (draw_sobol_samples(bounds=bounds, n=n, q=1).squeeze(1)).to(torch.float64)
    train_obj, train_const = test_f(train_x)
    return train_x, train_obj, train_const

# Acquisition

In [6]:
from botorch.acquisition import AnalyticAcquisitionFunction
import torch


class HyperVolumeScalarizedUCB(AnalyticAcquisitionFunction):
    def __init__(
        self,
        model,
        beta: float,
        theta: torch.Tensor,
        ref: torch.Tensor,
        maximize: bool = True,
    ) -> None:
        """
        Initializes the HyperVolume Scalarized Upper Confidence Bound Acquisition Function.

        Args:
            model: A BoTorch model representing the posterior distribution of the objectives.
            beta (Tensor of shape [1] or [o]): The exploration-exploitation trade-off parameter(s).
            theta (Tensor of shape [o]): The weights used for scalarizing the upper bounds, where `o` is the number of objectives.
            maximize (bool): Whether to maximize or minimize the scalarized objective. Defaults to True (maximize).
        """
        super(AnalyticAcquisitionFunction, self).__init__(model)
        self.maximize = maximize
        self.register_buffer("beta", torch.as_tensor(beta))
        self.register_buffer("theta", torch.as_tensor(theta))
        self.register_buffer("ref", torch.as_tensor(ref))

    @t_batch_mode_transform(expected_q=1)
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        """
        Evaluate the scalarized Upper Confidence Bound on the candidate set X.

        Args:
            X (Tensor of shape [b, d]): A tensor containing `(b)` batches of `d`-dimensional design points.

        Returns:
            Tensor of shape [b]: A tensor containing the scalarized Upper Confidence Bound values for each batch.
        """
        self.beta = self.beta.to(X)
        self.theta = self.theta.to(X)
        self.ref = self.ref.to(X)
        posterior = self.model.posterior(X)
        means = posterior.mean.squeeze(dim=-2)  # b x o
        std_devs = posterior.variance.squeeze(dim=-2).sqrt()  # b x o
        m = means.shape[1]
        # Calculate upper confidence bounds for each objective
        u_t = means + (self.beta.expand_as(means) * std_devs) - self.ref  # b x o

        # Apply the scalarization function to the upper bounds
        scalarized_ut = torch.min(
            torch.max(torch.zeros_like(u_t), u_t / self.theta) ** m, dim=-1
        )[
            0
        ]  # b

        return scalarized_ut

# Auxiliary Acq

In [7]:
class AuxiliaryAcq(MCAcquisitionFunction):
    def __init__(
        self,
        model,
        beta: float,
        theta: torch.Tensor,
        ref: torch.Tensor,
        maximize: bool = True,
    ) -> None:
        """
        An auxiliary acquisition defined in Algo.2

        Args:
            model: A BoTorch model representing the posterior distribution of the objectives.
            beta (Tensor of shape [1] or [o]): The exploration-exploitation trade-off parameter(s).
            theta (Tensor of shape [o]): The weights used for scalarizing the upper bounds, where `o` is the number of objectives.
            maximize (bool): Whether to maximize or minimize the scalarized objective. Defaults to True (maximize).
        """
        super(MCAcquisitionFunction, self).__init__(model)
        self.maximize = maximize
        self.register_buffer("beta", torch.as_tensor(beta))
        self.register_buffer("theta", torch.as_tensor(theta))
        self.register_buffer("ref", torch.as_tensor(ref))

    @t_batch_mode_transform()
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        """
        Evaluate the scalarized Upper Confidence Bound on the candidate set X.

        Args:
            X (Tensor of shape [b, d]): A tensor containing `(b)` batches of `d`-dimensional design points.

        Returns:
            Tensor of shape [b]: A tensor containing the scalarized Upper Confidence Bound values for each batch.
        """
        self.beta = self.beta.to(X)
        self.theta = self.theta.to(X)
        self.ref = self.ref.to(X)
        posterior = self.model.posterior(X)
        # print(posterior.mean.shape)
        means = posterior.mean  # b x q x o
        std_devs = posterior.variance.sqrt()  # b x q x o
        # Calculate upper confidence bounds for each objective
        u_t = means + (self.beta.expand_as(means) * std_devs) - self.ref  # b x qx o
        # print('233', u_t.shape)

        # Apply the scalarization function to the upper bounds
        scalarized_ut = torch.min(torch.min(u_t, dim=-1)[0], dim=-1)[0]  # b
        return scalarized_ut

# Constraints

In [8]:
import torch
from typing import List, Tuple, Callable


def create_ucb_constraints(model, beta: float, thresholds: torch.Tensor):
    """
    Creates a list of non-linear inequality constraints for a multi-output GP model, ensuring that the upper confidence
    bounds of the model's outputs are greater than or equal to the specified thresholds.

    Args:
        model (MultiTaskGP): A multi-output Gaussian Process model.
        beta (float): The scalar coefficient for the variance component of the UCB.
        thresholds (torch.Tensor): A tensor of thresholds for each output dimension.

    Returns:
        List[Tuple[Callable, bool]]: A list of tuples, each containing a callable constraint and a boolean indicating
                                      whether the constraint is intra-point (True) or inter-point (False). Each callable
                                      takes a tensor `X` of shape [q, d] (where `d` is the dimension of the input space
                                      and `q` can be 1 or more representing different design points) and returns a scalar
                                      that should be non-negative if the constraint is satisfied.
    """

    def constraint(X):
        """
        Evaluates all constraints for a batch of design points.

        Args:
            X (torch.Tensor): A tensor of shape [q, d] (where `d` is the dimension of the input space and `q` can be 1 or more
                              representing different design points).

        Returns:
            torch.Tensor: A tensor of shape [q, m] (where `m` is the number of output dimensions) containing the evaluated
                          constraints.
        """
        # Compute posterior at X
        X = X.unsqueeze(0)
        posterior = model.posterior(X)
        mean = posterior.mean
        variance = posterior.variance
        ucb = mean + beta * variance.sqrt()  # Compute the UCB

        # Evaluate all constraints and return the difference from thresholds
        return ucb - thresholds

    # Create a list of constraints for each output dimension, all set as intra-point since they evaluate individually
    constraints = [
        (lambda X, i=i: constraint(X)[:, i], True) for i in range(thresholds.size(0))
    ]

    return constraints

In [9]:
def get_random_sample_on_n_sphere(N, R):
    # Return a single sample of a vector of dimension N
    # with a uniform distribution on the (N-1)-Sphere surface of radius R.
    # RATIONALE: https://mathworld.wolfram.com/HyperspherePointPicking.html

    # Generate a normally distributed point
    X = torch.randn(N)

    # Normalize this point to the surface of the sphere, then scale by radius R
    return R * X / torch.norm(X)

# BO loop

Take thresholds to be: -8, -8

## Kernel picking

In [10]:
# from Customized_Kernels import Customized_RBF, Customized_Matern
from metrics import HV, violation

# rbf_module = Customized_RBF(2,2)
# matern_module = Customized_Matern(1,0.5,1)

In [None]:
import warnings
import time
import math
import torch
from botorch.models import SingleTaskGP, ModelListGP
from botorch.optim import optimize_acqf
from botorch.fit import fit_gpytorch_mll
from botorch.transforms import Standardize
from botorch.utils.transforms import normalize, unnormalize
from gpytorch.mlls import SumMarginalLogLikelihood
from metrics import HV

# Suppress warnings
warnings.filterwarnings("ignore")

# Counter for successful runs
c = 0
print("0" * 50)

# Define random seeds for reproducibility
random_seeds = [
    83810, 14592, 3278, 97196, 36048, 32098, 29256, 18289, 96530, 13434,
    88696, 97080, 71482, 11395, 77397, 55302, 4165, 3905, 12280, 28657,
    30495, 66237, 78907, 3478, 73563, 26062, 93850, 85181, 91924, 71426,
    54987, 28893, 58878, 77236, 36463, 851, 99458, 20926, 91506, 55392,
    44597, 36421, 20379, 28221, 44118, 13396, 12156, 49797, 12676, 47052,
]

# Initialize variables
declared = False  # Flag for early stopping

# Loop through a subset of random seeds
for seed in random_seeds[:10]:
    # Set seed for reproducibility
    torch.manual_seed(seed)
    
    # Generate initial data with constraints
    train_X, train_Y, train_Con = generate_initial_data(10)
    train_X = normalize(train_X, bounds)
    
    # Create copy for random baseline comparison
    train_X_r, train_Y_r, train_Con_r = train_X, train_Y, train_Con
    
    # Set reference points for hypervolume calculation
    a = -8
    b = -8
    thresholds = torch.tensor([a, b], dtype=torch.float64)
    
    # Initialize metrics tracking
    Hpv = []    # Hypervolume for CMOBO
    Hpvr = []   # Hypervolume for random sampling
    
    NUM_ITER = 100  # Number of BO iterations
    
    # Main Bayesian optimization loop
    for batch in range(NUM_ITER):
        t0 = time.monotonic()
        
        # Build GP models for objectives and constraints
        model_list = []
        model_list_const = []
        m = 2      # Number of objectives
        m_c = 4    # Number of constraints
        
        # Create models for objectives
        for i in range(m):
            current_model = SingleTaskGP(
                train_X=train_X,
                train_Y=train_Y[:, i].unsqueeze(-1),
                outcome_transform=Standardize(m=1),
                train_Yvar=torch.zeros((train_X.shape[0], 1)) + 0.05**2,
            )
            model_list.append(current_model)
            
        # Create models for constraints
        for i in range(m_c):
            current_model_const = SingleTaskGP(
                train_X=train_X, 
                train_Y=train_Con[:, i].unsqueeze(-1)
            )
            model_list_const.append(current_model_const)
            
        # Combine models
        model = ModelListGP(*model_list)
        model_const = ModelListGP(*model_list_const)
        
        # Create and fit marginal log likelihood
        mll_model = SumMarginalLogLikelihood(model.likelihood, model)
        mll_const = SumMarginalLogLikelihood(model_const.likelihood, model)
        fit_gpytorch_mll(mll_model)
        fit_gpytorch_mll(mll_const)
        
        # Sample theta from distribution (for scalarization)
        theta = get_random_sample_on_n_sphere(m, 1).abs()
        
        # Calculate beta parameters (exploration-exploitation trade-off)
        beta = 0.4 * math.log(4 * ((batch + 1)))
        beta_const = 0.2 * math.log(4 * (batch + 1))

        # Initialize auxiliary acquisition function for constraints
        AuxAcq = AuxiliaryAcq(
            model=model_const,
            beta=torch.tensor(beta_const),
            theta=theta,
            ref=torch.tensor([0, 0, 0, 0]),  # Constraint thresholds
        )
        
        # Optimize auxiliary acquisition function
        initializer, acq_value = optimize_acqf(
            acq_function=AuxAcq,
            q=3,
            num_restarts=10,
            raw_samples=10,
            bounds=torch.tensor([[0.0] * 4, [1.0] * 4]),
        )
        
        # Check for early stopping condition
        if acq_value < 0:
            declared = True
            break
            
        # Create hypervolume acquisition function
        HVUCB = HyperVolumeScalarizedUCB(
            model=model, 
            beta=torch.tensor(beta), 
            theta=theta, 
            ref=thresholds
        )
        
        # Optimize acquisition function with constraints
        candidate, acq_scalar = optimize_acqf(
            acq_function=HVUCB,
            q=1,
            num_restarts=4,
            nonlinear_inequality_constraints=create_ucb_constraints(
                beta=beta_const,
                model=model_const,
                thresholds=torch.tensor([0, 0, 0, 0]),  # Constraint thresholds
            ),
            batch_initial_conditions=initializer.view([-1, 1, 4]),
            bounds=torch.tensor([[0.0] * 4, [1.0] * 4]),
            options={"batch_limit": 1, "maxiter": 500},
        )
        
        # Update data with new observation
        train_X = torch.cat([train_X, candidate], dim=0)
        new_y, new_con = test_f(unnormalize(candidate, bounds=bounds))
        train_Y = torch.cat([train_Y, new_y], dim=0)
        train_Con = torch.cat([train_Con, new_con], dim=0)
        
        # Calculate constraint violations
        vio = torch.where(train_Con < 0, -train_Con, torch.zeros_like(train_Con)).sum(dim=-1)
        feasi_ind = vio == 0  # Identify feasible points
        
        # Calculate hypervolume (only for feasible points)
        hv = HV(Y=train_Y[feasi_ind, :], ref=thresholds)
        Hpv.append(hv)
        
        # Random sampling baseline
        new_x_r, new_con_r, new_y_r = generate_initial_data(1)
        new_x_r = normalize(new_x_r, bounds)
        new_y_r, new_con_r = test_f(unnormalize(new_x_r, bounds=bounds))
        
        # Update random baseline data
        train_Y_r = torch.cat([train_Y_r, new_y_r], dim=0)
        train_Con_r = torch.cat([train_Con_r, new_con_r], dim=0)
        
        # Calculate constraint violations for random points
        vio_r = torch.where(train_Con_r < 0, -train_Con_r, torch.zeros_like(train_Con_r)).sum(dim=-1)
        feasi_ind_r = vio_r == 0
        
        # Calculate hypervolume for random sampling (only for feasible points)
        hv_r = HV(Y=train_Y_r[feasi_ind_r, :], ref=thresholds)
        Hpvr.append(hv_r)
        
        # Print progress
        print(f"round{batch+1}", "CMOBO: ", hv, "random: ", hv_r)
    
    # Save results if optimization completed successfully
    if not declared:
        c += 1
        # Save hypervolume progression and constraint violations
        # torch.save(torch.tensor(Hpv), f"hv_design_cmobo_0.4_{c}.pt")
        # torch.save(torch.tensor(vio), f"vio_design_cmobo_0.4_{c}.pt")
        # torch.save(torch.tensor(Hpvr), f"hv_design_rand_{c}.pt")
        # torch.save(torch.tensor(vio_r), f"vio_design_rand_{c}.pt")
        print("o", end="")  # Indicate successful run
    else:
        print("*", end="")  # Indicate early stopping
    
    # Reset flag for next seed
    declared = False