In [1]:
import torch
import tqdm.notebook as tqdm

In [24]:
## Constants
n = 10
d = 10
ls = 1.

## Create dataset
X = torch.randn(n, d)
y = torch.randn(n).abs()

## RBF kernel
def kernel(x1, x2):
    return (x1.unsqueeze(-2) - x2.unsqueeze(-3)).square().sum(dim=-1).div(-2. * x1.size(-1) * (ls ** 2)).exp()

## Terms needed for EI
K = kernel(X, X)
chol = torch.linalg.cholesky(K, upper=False)
chol_inv_y = torch.linalg.solve_triangular(chol, y.unsqueeze(-1), upper=False)
ynorm = chol_inv_y.square().sum([-1, -2], keepdim=True).sqrt()
ymax = y.max()
normal = torch.distributions.Normal(0, 1)

## Optimization parameters
num_restarts = 1000
num_optim_iter = 2000

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

## 1) Maximize EI (standard, no constraits)

In [35]:
def ei(k):
    chol_inv_k = torch.linalg.solve_triangular(chol, k, upper=False)
    mu = (chol_inv_k.mT @ chol_inv_y)[..., 0, 0]
    sigma = (1 - chol_inv_k.square().sum([-1, -2])).clamp_min(1e-10).sqrt()
    z = (mu - ymax) / sigma
    ei_ = (mu - ymax) * normal.cdf(z) + sigma * normal.log_prob(z).exp()
    return ei_

# Run optimization (with multiple restarts, all run in parallel)
xs = torch.nn.Parameter(torch.randn(num_restarts, 1, d) * 3)
optim = torch.optim.Adam([xs], lr=0.1)
iterator = tqdm.tqdm(range(num_optim_iter))
for i in iterator:
    ks = kernel(X, xs)
    eis = ei(ks)
    eis.mul(-1).mean(-1).backward()
    iterator.set_postfix(ei=eis.max().item())
    optim.step()
    optim.zero_grad()

# Choose restart that maximizes EI
with torch.no_grad():
    ks = kernel(X, xs)
    eis = ei(ks)
    ei_max = eis.max()
    x = xs[eis.argmax()].detach()

# Print result from optimization
print(f"max_x EI: {ei_max.item()}\n")
print(f"x^* (that maximizes EI):\n{x}\n")
print(f"k(X, x^*):\n{kernel(X, x).squeeze()}\n")

  0%|          | 0/2000 [00:00<?, ?it/s]

max_x EI: 0.41078609228134155

x^* (that maximizes EI):
tensor([[ 0.9565, -0.1813,  0.4464,  0.0426,  2.0736, -2.5647, -0.3536,  0.7438,
         -0.3749,  0.1315]])

k(X, x^*):
tensor([0.1731, 0.3620, 0.7559, 0.3595, 0.1059, 0.2934, 0.4287, 0.6212, 0.5080,
        0.4374])



## 2) Choose a k vector (colinear to y) that maximizes EI

Even if no such x vector can produce such a k

In [44]:
gammas_ = torch.nn.Parameter(torch.randn(num_restarts, 1, 1))
optim = torch.optim.Adam([gammas_], lr=0.1)
iterator = tqdm.tqdm(range(num_optim_iter))

# Function to find the best x that minimizes ||k(X, x) - k||_2^2 for a given k
def unkernel(k):
    sq_dists = k.log().mul(-2. * X.size(-1) * (ls ** 2))
    _xs = torch.nn.Parameter(torch.randn(num_restarts, 1, d) * 4)
    optim = torch.optim.Adam([_xs], lr=0.1)
    iterator = tqdm.tqdm(range(num_optim_iter))
    for i in iterator:
        _sq_dists = (X.unsqueeze(-2) - _xs.unsqueeze(-3)).square().sum(dim=-1).squeeze(-1)
        losses = (_sq_dists - sq_dists).square().sum(dim=-1)
        loss = losses.mean()
        loss.backward()
        iterator.set_postfix(loss=losses.min().item())
        optim.step()
        optim.zero_grad()
    _x = _xs[losses.argmin()]
    return _x.detach()

# Run optimization (with multiple restarts, all run in parallel)
for i in iterator:
    gammas = torch.sigmoid(gammas_).div(ynorm)
    eis = ei(y.unsqueeze(-1) * gammas)
    eis.mul(-1).mean(-1).backward()
    iterator.set_postfix(ei=eis.max().item())
    optim.step()
    optim.zero_grad()

# Choose restart that maximizes EI
with torch.no_grad():
    gammas = torch.sigmoid(gammas_).div(ynorm)
    eis = ei(y.unsqueeze(-1) * gammas)
    ei_max_2 = eis.max()
    gamma = gammas[eis.argmax()].detach().squeeze()
    k_2 = y * gamma
x_2 = unkernel(k_2)

# Print result from optimization
print(f"max_x EI (upper bound): {ei_max_2.item()}\n")
print(f"gamma: {gamma.item()}")
print(f"k^* (vector colinear to y that maximizes EI(k)):\n{k_2}")
print(f"x^* (the x that most closely produces k^*):\n{x_2.squeeze()}\n")
print(f"Actual EI at x^*: {ei(kernel(X, x_2)).item()}\n")

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

max_x EI (upper bound): 1.3382394313812256

gamma: 0.3518446981906891
k^* (vector colinear to y that maximizes EI(k)):
tensor([0.0775, 0.0425, 0.5291, 0.3493, 0.1394, 0.2620, 0.1623, 0.5231, 0.4304,
        0.1375])
x^* (the x that most closely produces k^*):
tensor([-0.0631,  0.6964,  2.9375, -0.3638,  2.0657, -2.6489, -1.4593, -0.5205,
        -1.1546, -0.4600])

Actual EI at x^*: 0.14684860408306122

