In [None]:
import os
import torch
import gpytorch as gp
import numpy as np

if os.path.abspath('..') not in sys.path:
    sys.path.insert(0, os.path.abspath('..'))

# os.environ['CUDA_VISIBLE_DEVICES'] = "7"
device = 'cuda:7' if torch.cuda.is_available() else None

torch.cuda.set_device(device)

In [None]:
from bi_gp.bilateral_kernel import BilateralKernel, MaternLattice, RBFLattice

class BilateralGPModel(gp.models.ExactGP):
    def __init__(self, train_x, train_y, nu=None, order=1, min_noise=1e-4):
        likelihood = gp.likelihoods.GaussianLikelihood(
                      noise_constraint=gp.constraints.GreaterThan(min_noise))
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gp.means.ConstantMean()
        self.base_covar_module = MaternLattice(nu=nu, order=order) \
          if nu is not None else RBFLattice(order=order)
        self.covar_module = gp.kernels.ScaleKernel(self.base_covar_module)

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gp.distributions.MultivariateNormal(mean_x, covar_x)

In [None]:
from experiments.utils import prepare_dataset

data_iter = prepare_dataset('precipitation3d_all', uci_data_dir=None, device=device)
_, train_x, train_y = next(data_iter)

## Autograd

This currently uses the approximation to the gradient, defined as another collection of filtering operations.

In [None]:
with gp.settings.cg_tolerance(1.0), \
    gp.settings.max_preconditioner_size(50), \
    gp.settings.max_root_decomposition_size(100):
    model = BilateralGPModel(train_x, train_y, nu=1.5, order=1).to(device)
    model.base_covar_module.lengthscale = 1.0
    mll = gp.mlls.ExactMarginalLogLikelihood(model.likelihood, model)

    loss = -mll(model(train_x), train_y)
    print(f'Loss: {loss.item()}')

    loss.backward()

for idx, p in enumerate(model.parameters()):
    print(f'[{idx}] {p} ---> {p.grad}')

## Finite Difference

In [None]:
ell = torch.tensor(1.0).requires_grad_(True).to(device)
torch.autograd.gradcheck(f, ell, eps=1e-3, rtol=5e-4, atol=1e-5, nondet_tol=1e-3)

In [None]:
def f(ell):
    model = BilateralGPModel(train_x, train_y, nu=1.5, order=1).to(device)
    model.base_covar_module.lengthscale = ell

    # for i, p in enumerate(model.parameters()):
    #     if p_idx == i:
    #         print(f'[{p_idx}] {p} + {eps}')
    #         ## Assumes no ARD, scalar params
    #         p += eps

    mll = gp.mlls.ExactMarginalLogLikelihood(model.likelihood, model)
    loss = -mll(model(train_x), train_y)

    print(f'Lengthscale: {model.base_covar_module.lengthscale}; Loss: {loss}')
    return loss

In [None]:
with torch.no_grad():
    ell = 1.0
    eps = 1e-2
    grads = []

    for _ in range(5):
            grad = (f(ell + eps) - f(ell - eps)) / (2. * eps)
            print(f'Finite Diff: {grad}')
            grads.append(grad.item())

print(f'{np.mean(grads)} +/- {2 * np.std(grads)}')