In [4]:
import os
import torch
import sys

tkwargs = {
    "dtype": torch.double,
    "device": torch.device("cuda:3" if torch.cuda.is_available() else "cpu"),
}
SMOKE_TEST = os.environ.get("SMOKE_TEST")

### Problem setup


In [6]:
target_path = os.path.join(os.getcwd(), '..', '..', 'datasets','caco_target.pt')
domain_path = os.path.join(os.getcwd(), '..', '..', 'datasets', 'caco_domain.pt')

target = torch.load(target_path)
domain = torch.load(domain_path)
import random
def problem(X: torch.Tensor, tensor: torch.Tensor) -> int:
    # Compare the 1*d tensor (row) with each row in the n*d tensor
    matches = (tensor == X).all(dim=1)
    
    # Find the index of the matching row
    match_idx = torch.where(matches)[0][-1]
    # If a match is found, return the index
    
    return match_idx.item()


d = 2000
M = 3
bounds = torch.zeros(2, domain.shape[1])
bounds[0,:] = torch.min(domain, dim = 0)[0]
bounds[1,:] = torch.max(domain, dim = 0)[0]

In [219]:
#need a evaluate_slack function
def evaluate_slack(Y, ref = torch.tensor([0.5, 80, -5]).to(**tkwargs)):
    vio_raw = Y -ref
    return (vio_raw).sum(dim = -1, keepdim = True)


def generate_initial_data(n):
    # generate training data
    ind = random.sample(range(target.shape[0]), n)
    train_x = domain[ind,:]
    train_obj = target[ind,:]
    # negative values imply feasibility in botorch
    train_con = -evaluate_slack(train_obj)
    return train_x, train_obj, train_con

In [220]:
from botorch.models.gp_regression import SingleTaskGP
from botorch.models.model_list_gp_regression import ModelListGP
from botorch.models.transforms.outcome import Standardize
from botorch.utils.sampling import draw_sobol_samples
from botorch.utils.transforms import normalize, unnormalize
from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood
from gauche.kernels.fingerprint_kernels.tanimoto_kernel import TanimotoKernel
from gpytorch.kernels import ScaleKernel
base = TanimotoKernel()
covar_module = ScaleKernel(
base_kernel=base,
)


def initialize_model(train_x, train_obj, train_con):
    # define models for objective and constraint
    train_y = torch.cat([train_obj, train_con], dim=-1)
    models = []
    for i in range(train_y.shape[-1]):
        models.append(
            SingleTaskGP(
                train_x, train_y[..., i : i + 1], outcome_transform=Standardize(m=1), covar_module = covar_module,train_Yvar= torch.zeros((train_x.shape[0],1)) + 0.005**2
            )
        )
    model = ModelListGP(*models)
    mll = SumMarginalLogLikelihood(model.likelihood, model)
    return mll, model

In [222]:
from botorch.acquisition.monte_carlo import qExpectedImprovement
from botorch.acquisition.objective import GenericMCObjective
from botorch.acquisition.multi_objective.monte_carlo import (
    qNoisyExpectedHypervolumeImprovement,
)
from botorch.acquisition.multi_objective.objective import IdentityMCMultiOutputObjective
from botorch.optim.optimize import optimize_acqf_discrete
from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization
from botorch.utils.sampling import sample_simplex


BATCH_SIZE = 1
NUM_RESTARTS = 4
RAW_SAMPLES = 4

standard_bounds = torch.zeros(2, 2, **tkwargs)
standard_bounds[1] = 1

c = 0
def optimize_qparego_and_get_observation(model, train_obj, train_con, sampler):
    """Samples a set of random weights for each candidate in the batch, performs sequential greedy optimization
    of the qParEGO acquisition function, and returns a new candidate and observation."""
    # sample random weights
    weights = sample_simplex(3, **tkwargs).squeeze()
    # construct augmented Chebyshev scalarization
    scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj)
    # initialize the scalarized objective (w/o constraints)
    scalarized_objective = GenericMCObjective(
        # the last element of the model outputs is the constraint
        lambda Z, X: scalarization(Z[..., :-1]),
    )
    train_y = torch.cat([train_obj, train_con], dim=-1)
    acq_func = qExpectedImprovement(  # pyre-ignore: [28]
        model=model,
        objective=scalarized_objective,
        best_f=scalarized_objective(train_y).max(),
        constraints=[lambda Z: Z[..., -1]],
        sampler=sampler,
    )
    # optimize
    candidates, acq_v = optimize_acqf_discrete(
        acq_function = acq_func,
        choices  = domain,
        q = 1)
    # observe new values
    new_x = candidates
    new_obj = target[problem(new_x, domain),:].unsqueeze(0)
    # negative values imply feasibility in botorch
    new_con = -evaluate_slack(new_obj)
    return new_x, new_obj, new_con, acq_v

In [None]:
import time
import warnings
from metrics import HV
from botorch import fit_gpytorch_mll
from botorch.exceptions import BadInitialCandidatesWarning
from botorch.sampling.normal import SobolQMCNormalSampler
from botorch.utils.multi_objective.hypervolume import Hypervolume
from botorch.utils.multi_objective.pareto import is_non_dominated
warnings.filterwarnings('ignore')
print('O'*50)
warnings.filterwarnings("ignore", category=BadInitialCandidatesWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
c = 0
random_seeds = [83810, 14592, 3278, 97196, 36048, 32098, 29256, 18289, 96530, 13434, 88696, 97080, 71482, 11395, 77397, 55302, 4165, 3905, 12280, 28657, 30495, 66237, 78907, 3478, 73563,
26062, 93850, 85181, 91924, 71426, 54987, 28893, 58878, 77236, 36463, 851, 99458, 20926, 91506, 55392, 44597, 36421, 20379, 28221, 44118, 13396, 12156, 49797, 12676, 47052]
N_BATCH =60
MC_SAMPLES = 128 if not SMOKE_TEST else 16
verbose = True
for seed  in random_seeds[:10]:
    target = torch.load('caco_target.pt')
    domain = torch.load('caco_domain.pt')

    torch.manual_seed(seed)
    hvs_qparego, hvs_random = [], []

    # call helper functions to generate initial training data and initialize model
    train_x_qparego, train_obj_qparego, train_con_qparego = generate_initial_data(
        n= 64
    )
    mll_qparego, model_qparego = initialize_model(
        train_x_qparego, train_obj_qparego, train_con_qparego
    )

    train_x_random, train_obj_random, train_con_random = (
        train_x_qparego,
        train_obj_qparego,
        train_con_qparego,
    )

    # compute pareto front
    volume = HV(train_obj_qparego, torch.tensor([0.5, 80, -5]).to(**tkwargs))

    hvs_qparego.append(volume)
    hvs_random.append(volume)
    # run N_BATCH rounds of BayesOpt after the initial random batch
    for iteration in range(1, N_BATCH + 1):
        t0 = time.monotonic()

        # fit the models
        fit_gpytorch_mll(mll_qparego)

        # define the qParEGO and qNEHVI acquisition modules using a QMC sampler
        qparego_sampler = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))
    # qnehvi_sampler = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))

        # optimize acquisition functions and get new observations
        (
            new_x_qparego,
            new_obj_qparego,
            new_con_qparego,
            new_acq_v_qparego
        ) = optimize_qparego_and_get_observation(
            model_qparego, train_obj_qparego, train_con_qparego, qparego_sampler
        )

        new_x_random, new_obj_random, new_con_random = generate_initial_data(n=BATCH_SIZE)

        # update training points
        train_x_qparego = torch.cat([train_x_qparego, new_x_qparego])
        train_obj_qparego = torch.cat([train_obj_qparego, new_obj_qparego])
        train_con_qparego = torch.cat([train_con_qparego, new_con_qparego])

        train_x_random = torch.cat([train_x_random, new_x_random])
        train_obj_random = torch.cat([train_obj_random, new_obj_random])
        train_con_random = torch.cat([train_con_random, new_con_random])

        # update progress
        for hvs_list, train_obj in zip(
            (hvs_random, hvs_qparego),
            (train_obj_random, train_obj_qparego)
        ):
            # compute pareto front
            volume = HV(train_obj, torch.tensor([0.5, 80, -5]).to(**tkwargs))
            hvs_list.append(volume)
        print(f'round{iteration}: ', 'qparego: ', hvs_qparego[-1], 'random: ',hvs_random[-1] )
        ###prune candidate###
        mask = torch.tensor([True]*target.shape[0])
        mask[problem(new_x_qparego, domain)] = False
        domain = domain[mask, :]
        target = target[mask, :]
        ####################

        # reinitialize the models so they are ready for fitting on next iteration
        # Note: we find improved performance from not warm starting the model hyperparameters
        # using the hyperparameters from the previous iteration
        mll_qparego, model_qparego = initialize_model(
            train_x_qparego, train_obj_qparego, train_con_qparego
        )
    c +=1
    # torch.save(torch.tensor(hvs_qparego), f'hv_qparego_caco_{c}.pt')
    # torch.save(torch.tensor(train_obj_qparego), f'obj_qparego_caco_{c}.pt')
    print('O', end='')
    