In [1]:
## Pyro GP tutorial used as starting point:
## https://pyro.ai/examples/gp.html

import matplotlib.pyplot as plt
import numpy as np
import torch
import pyro
import pyro.contrib.gp as gp
import pyro.distributions as dist
import arviz

# Partition observations
X = np.asarray([x / 29 for x in range(1, 31)])
np.random.shuffle(X)
Y = 6 * np.square(X) - np.square(np.sin(6 * np.pi * X)) - 5 * np.power(X, 4) + 3 / 2 + np.random.normal(0.0, 0.1, 30)
Xtrain, Xtest, Ytrain, Ytest = torch.tensor(X[10:]), torch.tensor(X[:10]), torch.tensor(Y[10:]), torch.tensor(Y[:10])

  from .autonotebook import tqdm as notebook_tqdm


### Selecting a suitable model

In [2]:
# We chose a GP regression model and the Matern 3/2 kernel. In this setup, we have three hyper-parameters.
# I) The variance of the kernel, II) the lengthscale of the kernel, and III) the gaussian noise of the model.
# We chose to let the gaussian noise be fixed and equal to the noise of our data, while keeping the variance
# and lengthscale of the kernel variable. The prior distrubition we chose is a multivariate normal
# distribution (i.e. we consider the variance and lengthscale as normally distributed), with mean and variance
# based on what seems reasonable for the Matern 3/2 kernel, based on the lecture slides.

# Define kernel
def kernel(theta):
    return gp.kernels.Matern32(input_dim=1, variance=theta[0], lengthscale=theta[1])

# Define model
def model(xs, ys, kernel):
    return gp.models.GPRegression(xs, ys, kernel, noise=torch.tensor(0.01))

# Computes log-likelihood
def logLikelihood(xs, ys, kernel, theta):
    # See derivation in report
    t1 = 0.5 * torch.transpose(ys, 0, 0) * torch.linalg.inv(kernel.forward(xs)) * ys
    t2 = 0.5 * torch.log(torch.linalg.det(kernel.forward(xs)))
    t3 = 15.0 * torch.log(2 * torch.tensor(np.pi))
    return (- t1 - t2 - t3) * prior.log_prob(theta)

# Pick prior distributions
pyro.clear_param_store()
some_theta = [torch.tensor(1.5),torch.tensor(1)]
k = kernel(some_theta)
gpr = model(Xtrain, Ytrain, k)
gpr.kernel.lengthscale = pyro.nn.PyroSample(dist.LogNormal(0.0, 1.0))
gpr.kernel.variance = pyro.nn.PyroSample(dist.LogNormal(0.0, 1.0))

### Compute posterior predictive

In [3]:
optimizer = torch.optim.SGD(gpr.parameters(), lr=0.01)
loss_fn = pyro.infer.Trace_ELBO().differentiable_loss
losses = []
num_steps = 2000
for i in range(num_steps):
    optimizer.zero_grad()
    loss = loss_fn(gpr.model, gpr.guide)
    loss.backward()
    optimizer.step()
    losses.append(loss.item())

gpr.set_mode("guide")
print("variance = {}".format(gpr.kernel.variance))
print("lengthscale = {}".format(gpr.kernel.lengthscale))
print("noise = {}".format(gpr.noise))

variance = 1.216914415359497
lengthscale = 0.7207109332084656
noise = 0.21862062811851501


### Using NUTS

In [None]:
# Model is GP model from pyro
W = 100 # Number of warmup steps
C = 1 # Number of chains
S = 500 # Number of samples used in prediction

model = None # Should be GP model
nuts_kernel = pyro.infer.NUTS(model, jit_compile=True)
mcmc = pyro.infer.MCMC(nuts_kernel, S, num_chains=C, warmup_steps=W)
mcmc.run(X, Y)

#### Checking quality of samples using arviz

In [None]:
posterior_samples = mcmc.get_samples()
data = arviz.from_pyro(mcmc)
summary = arviz.summary(data)
print(summary)
arviz.plot_trace(data)
plt.show()
# Maybe use this: arviz.rcParams['plot.max_subplots'] = 18
arviz.plot_posterior(data, var_names=['w3', 'b3']) # TODO: Change var names