In [1]:
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyro
import pyro.contrib.gp as gp
import pyro.distributions as dist
import seaborn as sns
import torch
from sklearn.model_selection import train_test_split

plt.style.use("ggplot")
pyro.set_rng_seed(3317)
np.random.seed(3317)


In [2]:
def g(x):
    return -(torch.sin(6 * torch.pi * x) ** 2) + 6 * x**2 - 5 * x**4 + 3 / 2


In [3]:
def generate_data(l=30, split=1.0 / 3.0):
    """
    Generate training and test data for the function g(x) = -(sin(6*pi*x)^2) + 6x^2 - 5x^4 + 3/2.
    :param l: Number of data points.
    :param split: Fraction of data to use as test data.
    :return: x_train, y_train, x_test, y_test
    """
    x = (torch.arange(1, l + 1) - 1) / (l - 1)
    y = g(x) + torch.sqrt(torch.tensor(0.01)) * torch.randn(len(x))

    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=split, random_state=3317
    )

    return x_train, y_train, x_test, y_test


In [4]:
def log_likelihood(x, y, posterior_samples):
    rbf_l = torch.mean(posterior_samples["kernel.kern0.lengthscale"])
    rbf_v = torch.mean(posterior_samples["kernel.kern0.variance"])
    per_l = torch.mean(posterior_samples["kernel.kern1.lengthscale"])
    per_p = torch.mean(posterior_samples["kernel.kern1.period"])
    per_v = torch.mean(posterior_samples["kernel.kern1.variance"])
    noise = torch.mean(posterior_samples["noise"])

    rbf = gp.kernels.RBF(input_dim=1, variance=rbf_v, lengthscale=rbf_l)

    periodic = gp.kernels.Periodic(
        input_dim=1, period=per_p, lengthscale=per_l, variance=per_v
    )

    kernel = gp.kernels.Product(kern0=rbf, kern1=periodic)

    noise_y = noise

    n_samples = len(x)
    K = kernel.forward(x)

    # we are using the Cholesky decomposition
    # for the numerical stability of the computation
    # and for performance reasons

    # Compute the Cholesky decomposition
    upper = False
    L = torch.linalg.cholesky(K + noise_y * torch.eye(n_samples), upper=upper)

    alpha = torch.cholesky_solve(y.reshape(-1, 1), L, upper=upper)
    alpha = alpha.squeeze()
    # L being a diagonal matrix has the determinant equal to the sum of the log of
    # the element on the diagonal
    log_det = torch.sum(torch.log(torch.diag(L)))

    # Negative log-likelihood
    NLL = -0.5 * (
        torch.dot(y.T, alpha)
        + log_det
        + n_samples * torch.log(torch.tensor(2.0) * torch.pi)
    )

    return NLL


In [None]:
x_train, y_train, x_test, y_test = generate_data()

# Defining our kernels and GP-model
rbf = gp.kernels.RBF(
    input_dim=1, variance=torch.tensor(1.0), lengthscale=torch.tensor(0.9)
)
periodic = gp.kernels.Periodic(
    input_dim=1,
    period=torch.tensor(0.5),
    lengthscale=torch.tensor(1.0),
    variance=torch.tensor(1.0),
)
kernel = gp.kernels.Product(kern0=rbf, kern1=periodic)
gpr = gp.models.GPRegression(x_train, y_train, kernel=kernel, noise=torch.tensor(0.01))

# Putting priors on our kernel parameters
gpr.kernel.kern0.lengthscale = pyro.nn.PyroSample(dist.LogNormal(0.5, 1.0))
gpr.kernel.kern0.variance = pyro.nn.PyroSample(dist.LogNormal(0.0, 1.0))
# Periodic kernel
gpr.kernel.kern1.period = pyro.nn.PyroSample(
    dist.LogNormal(torch.log(torch.tensor(1 / 6)), 0.1)
)
gpr.kernel.kern1.lengthscale = pyro.nn.PyroSample(dist.LogNormal(1.4, 1.0))
gpr.kernel.kern1.variance = pyro.nn.PyroSample(dist.LogNormal(0.0, 1.0))
gpr.noise = pyro.nn.PyroSample(dist.Gamma(1, 100))

nuts_kernel = pyro.infer.NUTS(gpr.model, jit_compile=True)
mcmc = pyro.infer.MCMC(nuts_kernel, num_samples=500, num_chains=2, warmup_steps=500)
mcmc.run()


In [None]:
mcmc_samples = mcmc.get_samples()
torch.mean(mcmc_samples["kernel.kern0.lengthscale"])

### Load data into ArViZ


In [None]:
data = az.from_pyro(mcmc)


In [None]:
az.plot_trace(data)
plt.tight_layout()
# plt.savefig("arviz_trace.png", dpi=600)


In [None]:
az.plot_posterior(data)
plt.tight_layout()
# plt.savefig("arviz_posterior.png", 6pi=400)


In [None]:
summary = az.summary(data)
summary


In [None]:
df = pd.DataFrame(summary)
print(df.to_latex())


In [None]:
mcmc_test_loglikelihoods = []

iterations = 20

for iteration in range(iterations):
    print(f"Iteration {iteration + 1} / {iterations}")
    pyro.clear_param_store()
    x_train, y_train, x_test, y_test = generate_data()

    # Defining our kernels and GP-model
    rbf = gp.kernels.RBF(
        input_dim=1, variance=torch.tensor(1.0), lengthscale=torch.tensor(0.9)
    )

    periodic = gp.kernels.Periodic(
        input_dim=1,
        period=torch.tensor(0.5),
        lengthscale=torch.tensor(1.0),
        variance=torch.tensor(1.0),
    )
    kernel = gp.kernels.Sum(kern0=rbf, kern1=periodic)

    gpr = gp.models.GPRegression(
        x_train, y_train, kernel=kernel, noise=torch.tensor(0.01)
    )

    # Putting priors on our kernel parameters
    gpr.kernel.kern0.lengthscale = pyro.nn.PyroSample(dist.LogNormal(0.5, 1.0))
    gpr.kernel.kern0.variance = pyro.nn.PyroSample(dist.LogNormal(0.0, 1.0))
    # Periodic kernel
    gpr.kernel.kern1.period = pyro.nn.PyroSample(
        dist.LogNormal(torch.log(torch.tensor(1 / 6)), 0.1)
    )
    gpr.kernel.kern1.lengthscale = pyro.nn.PyroSample(dist.LogNormal(1.4, 1.0))
    gpr.kernel.kern1.variance = pyro.nn.PyroSample(dist.LogNormal(0.0, 1.0))
    gpr.noise = pyro.nn.PyroSample(dist.Gamma(1, 100))

    nuts_kernel = pyro.infer.NUTS(gpr.model, jit_compile=True, adapt_step_size=True)
    mcmc = pyro.infer.MCMC(nuts_kernel, num_samples=500, num_chains=2, warmup_steps=500)
    mcmc.run()

    posterior_samples = mcmc.get_samples(num_samples=500)

    mcmc_test_loglikelihoods.append(log_likelihood(x_test, y_test, posterior_samples))


In [None]:
mcmc_test_loglikelihoods = [x.detach().item() for x in mcmc_test_loglikelihoods]
print(mcmc_test_loglikelihoods)


In [14]:
map_log_likelihood = [
    -11.739569664001465,
    -10.372503280639648,
    -10.630261421203613,
    -11.856077194213867,
    -18.365325927734375,
    -14.903423309326172,
    -10.197603225708008,
    -10.638860702514648,
    -12.914301872253418,
    -9.359822273254395,
    -10.395574569702148,
    -11.220866203308105,
    -11.83434009552002,
    -9.190084457397461,
    -15.740955352783203,
    -10.94947624206543,
    -10.125082015991211,
    -21.8223934173584,
    -13.305448532104492,
    -14.780899047851562,
]

In [None]:
print("MAP:", np.mean(map_log_likelihood), np.std(map_log_likelihood, ddof=1))
print(
    "NUTS:", np.mean(mcmc_test_loglikelihoods), np.std(mcmc_test_loglikelihoods, ddof=1)
)


In [None]:
fig, ax = plt.subplots(figsize=(7, 5))

# Combine the arrays into a single DataFrame
df = pd.DataFrame(
    {
        "Values": np.concatenate([map_log_likelihood, mcmc_test_loglikelihoods]),
        "Method": ["MAP"] * len(map_log_likelihood)
        + ["NUTS"] * len(mcmc_test_loglikelihoods),
    }
)

# Create the boxplot
sns.boxplot(x="Method", y="Values", data=df, palette=["red", "blue"], ax=ax)
ax.set_title("Comparing MAP and NUTS posterior test log-likelihood")
ax.set_ylabel("log-likelihood")
plt.savefig("./figures/compare_loglikelihood_map_nuts_boxplot.png", dpi=600)
plt.show()