SM kernel works only under very limited condition

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import copy
import torch
import gpytorch
import botorch

# import scripts
from scripts.transformation import transformer
from scripts.transformation import fake_api
from scripts.transformation import api_utils
from scripts.exp_helper import exp_helper
from scripts.Botorch_opt import bayesian_optimiser
from scripts.function_slicer import slicer

dtype = torch.float

##### data

In [2]:
position = np.array([ 
    [10,21,39,12,14],
    [42,52,63,11,54],
    [71,82,99,42,57],
    [15,24,67,78,93],
]) 
N = np.sum(position,axis=1)
x0 = transformer.x_to_y(position) # change of variable
x0

array([[0.10416667, 0.24418605, 0.6       , 0.46153846],
       [0.18918919, 0.28888889, 0.4921875 , 0.16923077],
       [0.2022792 , 0.29285714, 0.5       , 0.42424242],
       [0.05415162, 0.09160305, 0.28151261, 0.45614035]])

In [3]:
# initial margin
fake_api.fake_margin(x0.flatten())

[11.026142125881833]

In [4]:
# time horizon
T = 5
ndim = 8 # input dimension

# intial samples; assume 1 data point; ndim-dimensional input
x = torch.from_numpy(x0.flatten()[:ndim]).float().view(-1,ndim) # shape [n,d]
y = torch.tensor(-1,dtype=dtype).view(-1,1) # shape [n,1]; min f = max -f
m0 = fake_api.fake_margin(x0.flatten()[:ndim])[0]

data = [(x,y,m0), (x,y,m0)]

def api(x,m0):
    x = x.numpy()
    margins = fake_api.fake_margin(x)
    record = [None] * len(margins)
    for i,margin in enumerate(margins):
        record[i] = -(margin/m0)
    return torch.tensor(record, dtype = dtype).view(-1,1)
        
print(x, x.size())
print(y)
print(m0)

tensor([[0.1042, 0.2442, 0.6000, 0.4615, 0.1892, 0.2889, 0.4922, 0.1692]]) torch.Size([1, 8])
tensor([[-1.]])
10.227246439833696


##### BO

In [11]:
X_train = x.unsqueeze(0).repeat(1,1,1)
y_train = y.unsqueeze(0).repeat(1,1,1)
print(X_train.size(),y_train.size())

torch.Size([1, 1, 8]) torch.Size([1, 1, 1])


In [12]:
def init_model(x, y, covar: callable, state_dict=None):
    # zeros-noise settings
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    likelihood.noise = 1e-4  
    likelihood.noise_covar.raw_noise.requires_grad_(False)
    model = botorch.models.SingleTaskGP(x, y, likelihood)
    model.covar_module = covar(x)

    mll = gpytorch.mlls.ExactMarginalLogLikelihood(model.likelihood, model)
    if state_dict is not None: 
        model.load_state_dict(state_dict)
    return mll, model

def fit_model(mll):
    botorch.fit_gpytorch_model(mll)

def covar(x):
    return gpytorch.kernels.SpectralMixtureKernel(
                batch_shape = torch.Size([1]),
#                 batch_size = 10,
                ard_num_dims=x.size(-1),
                num_mixtures=3)

In [13]:
def init_acqu_func(model):
    acq = botorch.acquisition.analytic.UpperConfidenceBound(
        model=model,
        beta = 1,
        objective = None, 
    )
    return acq

def inner_loop(acq_func,batch_size,bounds):
    candidates, _ = botorch.optim.optimize_acqf(
    acq_function=acq_func,
    bounds=bounds,
    q=batch_size,
    num_restarts=1,      
    raw_samples=1,        
    sequential = False, 
    )
    query = candidates.detach()
    return query

In [14]:
bounds = torch.tensor([[0.0] * ndim, [1.0] * ndim], dtype = dtype)
q=1

mll, model = init_model(X_train, y_train, covar, state_dict=None)

fit_model(mll)

acq = init_acqu_func(model)

query = inner_loop(acq, q, bounds)

print(query)

tensor([[0.5147, 0.9682, 0.1045, 0.0400, 0.0487, 0.2555, 0.6755, 0.8857]])


In [170]:
X_train = x.unsqueeze(0).repeat(1,1,1)
y_train = y.unsqueeze(0).repeat(1,1,1)
print(X_train.size(),y_train.size())

torch.Size([1, 1, 8]) torch.Size([1, 1, 1])


In [171]:
def init_model(x, y, covar: callable, state_dict=None):
    # zeros-noise settings
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    likelihood.noise = 1e-4  
    likelihood.noise_covar.raw_noise.requires_grad_(False)
    model = botorch.models.SingleTaskGP(x, y, likelihood)
    model.covar_module = covar(x)

    mll = gpytorch.mlls.ExactMarginalLogLikelihood(model.likelihood, model)
    if state_dict is not None: 
        model.load_state_dict(state_dict)
    return mll, model

def fit_model(mll):
    botorch.fit_gpytorch_model(mll)

def covar(x):
    return gpytorch.kernels.SpectralMixtureKernel(
                batch_shape = torch.Size([1]),
#                 batch_size = 1,
                ard_num_dims=x.size(-1),
                num_mixtures=3)

In [172]:
def fit_model2(mll, model, x, y):
    
    model.train()
    model.likelihood.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
    
    epoch = 2
    y = y.squeeze(-1)

    
    print("start training")
    print(x.size(), y.size())

    for i in range(epoch):
        optimizer.zero_grad()
        output = model(x)
        loss = -mll(output, y)[0]
        loss.backward()
        optimizer.step()
    print("Iter %d - Loss: %.3f" % (i, loss.item()))
    model.eval()
    model.likelihood.eval()

In [178]:
def init_acqu_func(model):
    acq = botorch.acquisition.analytic.UpperConfidenceBound(
        model=model,
        beta = 1,
        objective = None, 
    )
    return acq

def inner_loop(acq_func,batch_size,bounds):
    candidates, _ = botorch.optim.optimize_acqf(
    acq_function=acq_func,
    bounds=bounds,
    q=batch_size,
    num_restarts=1,       # number of starting point SGD
    raw_samples=1,        # heuristic init
    sequential = False,         # this enable SGD, instead of one-step optimal
    )
    query = candidates.detach()
    return query

In [179]:
bounds = torch.tensor([[0.0] * ndim, [1.0] * ndim], dtype = dtype)

mll, model = init_model(X_train, y_train, covar, state_dict=None)

fit_model2(mll, model, X_train,y_train)

acq = init_acqu_func(model)

query = inner_loop(acq, q, bounds)

print(query)

start training
torch.Size([1, 1, 8]) torch.Size([1, 1])
Iter 1 - Loss: 1.458
tensor([[0.9976, 0.7974, 0.8311, 0.7608, 0.1730, 0.1412, 0.7551, 0.1114]])
