In [None]:
# © Crown Copyright GCHQ
#
# Licensed under the GNU General Public License, version 3 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.gnu.org/licenses/gpl-3.0.en.html
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [None]:
# sphinx ignore

import sys

sys.path.append("../..")

%load_ext autoreload
%autoreload 2

%config Completer.use_jedi = False

In [None]:
random_seed = 1_989

In [None]:
import gpytorch
import matplotlib.pyplot as plt
import numpy as np
import torch
from numpy.typing import NDArray
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
from torch import Tensor
from typing_extensions import override

from vanguard.datasets.synthetic import SyntheticDataset, very_complicated_f
from vanguard.kernels import ScaledRBFKernel
from vanguard.vanilla import GaussianGPController

In [None]:
DATASET = SyntheticDataset(functions=[very_complicated_f], n_train_points=10, rng=np.random.default_rng(random_seed))

In [None]:
kernel = ConstantKernel() * RBF()

In [None]:
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9, alpha=DATASET.train_y_std.detach().cpu().numpy())

In [None]:
gp.fit(DATASET.train_x.detach().cpu().numpy(), DATASET.train_y.detach().cpu().numpy())

In [None]:
linspace = np.linspace(DATASET.train_x.min().item(), DATASET.train_x.max().item(), num=100)
predictions, uncertainty = gp.predict(linspace.reshape(-1, 1), return_std=True)

In [None]:
# Convert to numpy arrays for plotting
plt_train_x = DATASET.train_x.numpy(force=True)
plt_train_y = DATASET.train_y.numpy(force=True)

plt.figure(figsize=(20, 10))
plt.scatter(plt_train_x, plt_train_y, label="Truth")
plt.plot(linspace, predictions, color="olive", label="Prediction")
plt.fill_between(linspace, predictions - 1.96 * uncertainty, predictions + 1.96 * uncertainty, color="olive", alpha=0.3)
plt.title("Sci-kit Learn Gaussian Process")
plt.legend()
plt.show()

In [None]:
kernel = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

In [None]:
mean = gpytorch.means.ConstantMean()

In [None]:
likelihood = gpytorch.likelihoods.GaussianLikelihood()

In [None]:
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x: Tensor, train_y: Tensor, y_std: Tensor) -> None:
        likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=y_std)
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    @override
    def forward(self, x: Tensor) -> gpytorch.distributions.MultivariateNormal:
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [None]:
gp = ExactGPModel(
    torch.as_tensor(DATASET.train_x),
    torch.as_tensor(DATASET.train_y),
    torch.ones(len(DATASET.train_y)) * DATASET.train_y_std,
)

In [None]:
optimiser = torch.optim.Adam([{"params": gp.parameters()}], lr=0.1)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(gp.likelihood, gp)


def fit(model: ExactGPModel, train_x: Tensor, train_y: Tensor, n_iters: int) -> None:
    model.train()
    model.likelihood.train()

    for i in range(n_iters):
        optimiser.zero_grad()
        output = model(train_x)
        loss = -mll(output, train_y)
        loss.backward()
        optimiser.step()

In [None]:
fit(gp, torch.as_tensor(DATASET.train_x), torch.as_tensor(DATASET.train_y), n_iters=100)

In [None]:
def predict(model: ExactGPModel, x: Tensor) -> tuple[NDArray[float], NDArray[float]]:
    model.eval()
    model.likelihood.eval()

    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        prediction = model.likelihood(model(x))

    means = prediction.loc.numpy(force=True)
    variances = prediction.lazy_covariance_matrix.diagonal().numpy(force=True)

    return means, np.sqrt(np.abs(variances))

In [None]:
predictions, uncertainty = predict(gp, torch.as_tensor(linspace))

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(plt_train_x, plt_train_y, label="Truth")
plt.plot(linspace, predictions, color="green", label="Prediction")
plt.fill_between(linspace, predictions - 1.96 * uncertainty, predictions + 1.96 * uncertainty, color="green", alpha=0.3)
plt.title("GPyTorch Gaussian Process")
plt.legend()
plt.show()

In [None]:
gp = GaussianGPController(
    DATASET.train_x,
    DATASET.train_y,
    kernel_class=ScaledRBFKernel,
    y_std=DATASET.train_y_std,
    rng=np.random.default_rng(random_seed),
)
gp.fit(100)

In [None]:
posterior = gp.predictive_likelihood(linspace)
predictions, covar = posterior._tensor_prediction()
predictions, covar = predictions.numpy(force=True), covar.numpy(force=True)
uncertainty = np.sqrt(covar.diagonal())

In [None]:
median, lower, upper = posterior.confidence_interval()

# Convert to numpy for plotting
median = median.numpy(force=True)
lower = lower.numpy(force=True)
upper = upper.numpy(force=True)

plt.figure(figsize=(20, 10))
plt.scatter(plt_train_x, plt_train_y, label="Truth")
plt.plot(linspace, median, color="red", label="Prediction")
plt.fill_between(linspace, lower, upper, color="red", alpha=0.3)
plt.title("Vanguard Gaussian Process")
plt.legend()
plt.show()