In [1]:
import os
import torch


tkwargs = {
    "dtype": torch.double,
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
}
SMOKE_TEST = os.environ.get("SMOKE_TEST")

### Problem setup


In [2]:
from botorch.test_functions.multi_objective import Penicillin

problem = Penicillin(negate=True).to(**tkwargs)
problem.bounds = torch.tensor(
    [[60, 10, 293, 10, 0.01, 600, 5], [120, 18, 303, 18, 0.1, 700, 6.5]],
    dtype=torch.float64,
)
# problem.ref_point =torch.tensor([10, -60, -350]).to(**tkwargs)
d = 7
M = 3

In [3]:
from botorch.models.gp_regression import SingleTaskGP
from botorch.models.model_list_gp_regression import ModelListGP
from botorch.models.transforms.outcome import Standardize
from botorch.utils.sampling import draw_sobol_samples
from botorch.utils.transforms import normalize, unnormalize
from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood
from gpytorch.kernels import RBFKernel, ScaleKernel


def evaluate_slack(X, ref=torch.tensor([10, -60, -350]).to(**tkwargs)):
    Y = problem(X)
    vio_raw = Y - ref
    return (vio_raw).sum(dim=-1, keepdim=True)


NOISE_SE = torch.tensor([0.1] * M).to(**tkwargs)


def generate_initial_data(n):
    # generate training data
    train_x = draw_sobol_samples(bounds=problem.bounds, n=n, q=1).squeeze(1)
    train_obj_true = problem(train_x)
    train_obj = train_obj_true + torch.randn_like(train_obj_true) * NOISE_SE
    # negative values imply feasibility in botorch
    # train_con = -evaluate_slack(train_x)
    return train_x, train_obj, train_obj_true


base = RBFKernel()
covar_module = ScaleKernel(
    base_kernel=base,
)


def initialize_model(train_x, train_obj):
    # define models for objective and constraint
    train_x = normalize(train_x, problem.bounds)
    models = []
    for i in range(train_obj.shape[-1]):
        train_y = train_obj[..., i : i + 1]
        train_yvar = torch.full_like(train_y, NOISE_SE[i] ** 2)
        models.append(
            SingleTaskGP(
                train_x,
                train_y,
                outcome_transform=Standardize(m=1),
                train_Yvar=torch.zeros((train_x.shape[0], 1)) + 0.01,
                covar_module=covar_module,
            )
        )
    model = ModelListGP(*models)
    mll = SumMarginalLogLikelihood(model.likelihood, model)
    return mll, model

In [4]:
from botorch.optim.optimize import optimize_acqf, optimize_acqf_list
from botorch.acquisition.objective import GenericMCObjective
from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization
from botorch.utils.multi_objective.box_decompositions.non_dominated import (
    FastNondominatedPartitioning,
)
from botorch.acquisition.multi_objective.monte_carlo import (
    qExpectedHypervolumeImprovement,
    qNoisyExpectedHypervolumeImprovement,
)
from botorch.utils.sampling import sample_simplex


BATCH_SIZE = 1
NUM_RESTARTS = 10 if not SMOKE_TEST else 2
RAW_SAMPLES = 512 if not SMOKE_TEST else 4

standard_bounds = torch.zeros(2, problem.dim, **tkwargs)
standard_bounds[1] = 1


def optimize_qehvi_and_get_observation(model, train_x, train_obj, sampler):
    """Optimizes the qEHVI acquisition function, and returns a new candidate and observation."""
    # partition non-dominated space into disjoint rectangles
    with torch.no_grad():
        pred = model.posterior(normalize(train_x, problem.bounds)).mean
    partitioning = FastNondominatedPartitioning(
        ref_point=problem.ref_point,
        Y=pred,
    )
    acq_func = qExpectedHypervolumeImprovement(
        model=model,
        ref_point=problem.ref_point,
        partitioning=partitioning,
        sampler=sampler,
    )
    # optimize
    candidates, _ = optimize_acqf(
        acq_function=acq_func,
        bounds=standard_bounds,
        q=BATCH_SIZE,
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES,  # used for intialization heuristic
        options={"batch_limit": 5, "maxiter": 200},
        sequential=True,
    )
    # observe new values
    new_x = unnormalize(candidates.detach(), bounds=problem.bounds)
    new_obj_true = problem(new_x)
    new_obj = new_obj_true + torch.randn_like(new_obj_true) * NOISE_SE
    return new_x, new_obj, new_obj_true

In [5]:
def optimize_qnehvi_and_get_observation(model, train_x, train_obj, sampler):
    """Optimizes the qEHVI acquisition function, and returns a new candidate and observation."""
    # partition non-dominated space into disjoint rectangles
    acq_func = qNoisyExpectedHypervolumeImprovement(
        model=model,
        ref_point=problem.ref_point.tolist(),  # use known reference point
        X_baseline=normalize(train_x, problem.bounds),
        prune_baseline=True,  # prune baseline points that have estimated zero probability of being Pareto optimal
        sampler=sampler,
    )
    # optimize
    candidates, _ = optimize_acqf(
        acq_function=acq_func,
        bounds=standard_bounds,
        q=BATCH_SIZE,
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES,  # used for intialization heuristic
        options={"batch_limit": 5, "maxiter": 200},
        sequential=True,
    )
    # observe new values
    new_x = unnormalize(candidates.detach(), bounds=problem.bounds)
    new_obj_true = problem(new_x)
    new_obj = new_obj_true + torch.randn_like(new_obj_true) * NOISE_SE
    return new_x, new_obj, new_obj_true

In [6]:
from botorch.acquisition.monte_carlo import qNoisyExpectedImprovement


def optimize_qnparego_and_get_observation(model, train_x, train_obj, sampler):
    """Samples a set of random weights for each candidate in the batch, performs sequential greedy optimization
    of the qNParEGO acquisition function, and returns a new candidate and observation.
    """
    train_x = normalize(train_x, problem.bounds)
    with torch.no_grad():
        pred = model.posterior(train_x).mean
    acq_func_list = []
    for _ in range(BATCH_SIZE):
        weights = sample_simplex(problem.num_objectives, **tkwargs).squeeze()
        objective = GenericMCObjective(
            get_chebyshev_scalarization(weights=weights, Y=pred)
        )
        acq_func = qNoisyExpectedImprovement(  # pyre-ignore: [28]
            model=model,
            objective=objective,
            X_baseline=train_x,
            sampler=sampler,
            prune_baseline=True,
        )
        acq_func_list.append(acq_func)
    # optimize
    candidates, _ = optimize_acqf_list(
        acq_function_list=acq_func_list,
        bounds=standard_bounds,
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES,  # used for intialization heuristic
        options={"batch_limit": 5, "maxiter": 200},
    )
    # observe new values
    new_x = unnormalize(candidates.detach(), bounds=problem.bounds)
    new_obj_true = problem(new_x)
    new_obj = new_obj_true + torch.randn_like(new_obj_true) * NOISE_SE
    return new_x, new_obj, new_obj_true

In [7]:
def voxel_grid_sampling_with_indices(points, voxel_size=5.0):
    # Calculate the minimum and maximum coordinates
    min_coords = torch.min(points, dim=0).values
    max_coords = torch.max(points, dim=0).values

    # Shift points so that the minimum coordinates are at the origin
    shifted_points = points - min_coords

    # Quantize the points to voxel grid coordinates
    voxel_indices = torch.floor(shifted_points / voxel_size).long()

    # Use a dictionary to store unique voxel indices and the corresponding row index
    voxel_dict = {}
    for idx, voxel_idx in enumerate(voxel_indices):
        voxel_idx_tuple = tuple(voxel_idx.tolist())
        if voxel_idx_tuple not in voxel_dict:
            voxel_dict[voxel_idx_tuple] = idx

    # Extract the row indices of the sampled points
    sampled_indices = torch.tensor(list(voxel_dict.values()))

    return sampled_indices

In [None]:
import time
import warnings
import torch
from botorch import fit_gpytorch_mll
from botorch.exceptions import BadInitialCandidatesWarning
from botorch.sampling.normal import SobolQMCNormalSampler
from botorch.utils.multi_objective.box_decompositions.dominated import (
    DominatedPartitioning,
)
from botorch.utils.multi_objective.pareto import is_non_dominated

# Suppress warnings
warnings.filterwarnings("ignore")

# Define random seeds for reproducibility
random_seeds = [
    83810, 14592, 3278, 97196, 36048, 32098, 29256, 18289, 96530, 13434,
    88696, 97080, 71482, 11395, 77397, 55302, 4165, 3905, 12280, 28657,
    30495, 66237, 78907, 3478, 73563, 26062, 93850, 85181, 91924, 71426,
    54987, 28893, 58878, 77236, 36463, 851, 99458, 20926, 91506, 55392,
    44597, 36421, 20379, 28221, 44118, 13396, 12156, 49797, 12676, 47052,
]

# Initialize variables
declared = False  # Flag for early stopping
N_BATCH = 70      # Number of BO iterations
MC_SAMPLES = 128 if not SMOKE_TEST else 16  # Number of MC samples for acquisition
verbose = True    # Whether to print detailed progress
c = 0             # Counter for successful runs

# Loop through a subset of random seeds
for seed in random_seeds[:10]:
    # Set seed for reproducibility
    torch.manual_seed(seed)
    
    # Generate initial data
    train_x_qnehvi, train_obj_qnehvi, train_obj_true_qnehvi = generate_initial_data(64)
    
    # Apply voxel grid sampling to get well-distributed initial points
    resample_ind = voxel_grid_sampling_with_indices(train_obj_qnehvi)
    train_x_qnehvi = train_x_qnehvi[resample_ind, :]
    train_obj_qnehvi = train_obj_qnehvi[resample_ind, :]
    # train_con_qnehvi = train_con_qnehvi[resample_ind, :]
    
    # Initialize hypervolume tracking
    hvs_qnehvi = []

    # Initialize model for qNEHVI
    mll_qnehvi, model_qnehvi = initialize_model(train_x_qnehvi, train_obj_qnehvi)

    # Compute initial hypervolume
    bd = DominatedPartitioning(
        ref_point=torch.tensor([10, -60, -350]).to(**tkwargs), 
        Y=train_obj_true_qnehvi
    )
    volume = bd.compute_hypervolume().item()

    # Store initial hypervolume
    hvs_qnehvi.append(volume)

    # Run N_BATCH rounds of Bayesian optimization after the initial batch
    for iteration in range(1, N_BATCH + 1):
        t0 = time.monotonic()  # Track iteration time

        # Fit the GP model
        fit_gpytorch_mll(mll_qnehvi)

        # Define the QMC sampler for qNEHVI
        qnehvi_sampler = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))
        
        # Optimize qNEHVI acquisition function and get new observation
        (
            new_x_qnehvi,
            new_obj_qnehvi,
            new_obj_true_qnehvi,
        ) = optimize_qnehvi_and_get_observation(
            model_qnehvi, train_x_qnehvi, train_obj_qnehvi, qnehvi_sampler
        )
        
        train_x_qnehvi = torch.cat([train_x_qnehvi, new_x_qnehvi])
        train_obj_qnehvi = torch.cat([train_obj_qnehvi, new_obj_qnehvi])
        train_obj_true_qnehvi = torch.cat([train_obj_true_qnehvi, new_obj_true_qnehvi])


        # Compute updated hypervolume
        bd = DominatedPartitioning(
            ref_point=torch.tensor([10, -60, -350]).to(**tkwargs),
            Y=train_obj_true_qnehvi,
        )
        volume = bd.compute_hypervolume().item()
        hvs_qnehvi.append(volume)

        # Reinitialize the model for the next iteration
        # Note: Better performance is often achieved by not warm-starting with previous hyperparameters
        # mll_qparego, model_qparego = initialize_model(train_x_qparego, train_obj_qparego)
        # mll_qehvi, model_qehvi = initialize_model(train_x_qehvi, train_obj_qehvi)
        mll_qnehvi, model_qnehvi = initialize_model(train_x_qnehvi, train_obj_qnehvi)

        # Calculate and report iteration time
        t1 = time.monotonic()
        if verbose:
            print(
                f"({hvs_qnehvi[-1]:>4.2f}), " f"time = {t1-t0:>4.2f}.",
                end="",
            )
        else:
            print(".", end="")
    
    # Save results for this run
    c += 1
    torch.save(hvs_qnehvi, f"peni_unconstrained_hv_{c}.pt")
    torch.save(train_obj_true_qnehvi, f"peni_unconstrained_obj_true_{c}.pt")
    torch.save(train_obj_qnehvi, f"peni_unconstrained_obj_{c}.pt")