In [1]:
import math
import torch
import gpytorch
from matplotlib import pyplot as plt

torch.set_default_dtype(torch.float32)
%set_env CUDA_VISIBLE_DEVICES=1

%matplotlib inline
%load_ext autoreload
%autoreload 2

env: CUDA_VISIBLE_DEVICES=1


In [204]:
import os
import urllib.request
from scipy.io import loadmat

dataset = 'yacht'

data = torch.Tensor(loadmat(f'/home/jake.gardner/data/{dataset}.mat')['data'])

subsample = 1.0
data = data[:int(subsample * data.shape[0]), :]

In [205]:
import numpy as np


N = data.shape[0]
# make train/val/test
n_train = int(0.9 * N)
train_x, train_y = data[:n_train, :-1], data[:n_train, -1]
test_x, test_y = data[n_train:, :-1], data[n_train:, -1]

# normalize features
mean = train_x.mean(dim=-2, keepdim=True)
std = train_x.std(dim=-2, keepdim=True) + 1e-6 # prevent dividing by 0
train_x = (train_x - mean) / std
test_x = (test_x - mean) / std

# normalize labels
# mean, std = train_y.mean(),train_y.std()
# train_y = (train_y - mean) / std
# test_y = (test_y - mean) / std

# make continguous
train_x, train_y = train_x.contiguous(), train_y.contiguous()
test_x, test_y = test_x.contiguous(), test_y.contiguous()

output_device = torch.device('cuda:0')

train_x, train_y = train_x.to(output_device), train_y.to(output_device)
test_x, test_y = test_x.to(output_device), test_y.to(output_device)

print(train_x.shape, test_x.shape)

torch.Size([277, 6]) torch.Size([31, 6])


In [206]:
from gpytorch.kernels.spectral_autoregressive_flow_kernel import (
    RFNSSpectralDeltaKernel,
)

from gpytorch.kernels import MaternKernel, ScaleKernel, ProductStructureKernel
from gpytorch.kernels.keops import MaternKernel as KMaternKernel

class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
#         self.mean_module = gpytorch.means.LinearMean(train_x.size(-1))
        self.mean_module = gpytorch.means.ZeroMean()
        base_covar_module = RFNSSpectralDeltaKernel(
            num_dims=train_x.size(-1),
            num_deltas=1024,
            ard_num_dims=train_x.size(-1),
        )# , num_dims=train_x.size(-1)
        base_covar_module.initialize_from_data(train_x, train_y)
#         base_covar_module = MaternKernel(ard_num_dims=train_x.size(-1))
        self.covar_module = ScaleKernel(base_covar_module)
#         self.covar_module = KMaternKernel(nu=2.5, ard_num_dims=3)
    
    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# initialize likelihood and model
likelihood = gpytorch.likelihoods.GaussianLikelihood(noise_constraint=gpytorch.constraints.Positive()).cuda()
model = ExactGPModel(train_x, train_y, likelihood).cuda()
# likelihood.register_prior("noise_prior", gpytorch.priors.HorseshoePrior(1.), "noise")

print(model)

ExactGPModel(
  (likelihood): GaussianLikelihood(
    (noise_covar): HomoskedasticNoise(
      (raw_noise_constraint): Positive()
    )
  )
  (mean_module): ZeroMean()
  (covar_module): ScaleKernel(
    (base_kernel): RFNSSpectralDeltaKernel(
      (raw_lengthscale_constraint): Positive()
      (raw_Z_constraint): Positive()
    )
    (raw_outputscale_constraint): Positive()
  )
)


In [207]:
train_x.shape

torch.Size([277, 6])

In [208]:
# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam([
    {'params': model.parameters()},  # Includes GaussianLikelihood parameters
], lr=0.01)

scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[500], gamma=0.1)

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

with gpytorch.settings.max_cholesky_size(5000), gpytorch.settings.cg_tolerance(0.001):
    training_iter = 750
    for i in range(training_iter):
        # Zero gradients from previous iteration
        optimizer.zero_grad()
        # Output from model
        output = model(train_x)
        # Calc loss and backprop gradients
        loss = -mll(output, train_y)
        loss.backward()
        if i % 50 == 0:
            print(f'Iter {i}/{training_iter} - Loss: {loss.item():.2f}')
        optimizer.step()
        scheduler.step()

Iter 0/750 - Loss: 2.82
Iter 50/750 - Loss: 1.32
Iter 100/750 - Loss: 1.07
Iter 150/750 - Loss: 0.91
Iter 200/750 - Loss: 0.73
Iter 250/750 - Loss: 0.53
Iter 300/750 - Loss: 0.34
Iter 350/750 - Loss: 0.14
Iter 400/750 - Loss: -0.05
Iter 450/750 - Loss: -0.23
Iter 500/750 - Loss: -0.37
Iter 550/750 - Loss: -0.39
Iter 600/750 - Loss: -0.41
Iter 650/750 - Loss: -0.42
Iter 700/750 - Loss: -0.43


In [209]:
from botorch.fit import fit_gpytorch_model
with gpytorch.settings.max_cholesky_size(5000):
    fit_gpytorch_model(mll)

## Make predictions with the model

In the next cell, we make predictions with the model. To do this, we simply put the model and likelihood in eval mode, and call both modules on the test data.

Just as a user defined GP model returns a `MultivariateNormal` containing the prior mean and covariance from forward, a trained GP model in eval mode returns a `MultivariateNormal` containing the posterior mean and covariance. Thus, getting the predictive mean and variance, and then sampling functions from the GP at the given test points could be accomplished with calls like:

```python
f_preds = model(test_x)
y_preds = likelihood(model(test_x))

f_mean = f_preds.mean
f_var = f_preds.variance
f_covar = f_preds.covariance_matrix
f_samples = f_preds.sample(sample_shape=torch.Size(1000,))
```

The `gpytorch.settings.fast_pred_var` context is not needed, but here we are giving a preview of using one of our cool features, getting faster predictive distributions using [LOVE](https://arxiv.org/abs/1803.06058).

In [210]:
model.train()

with gpytorch.settings.max_cholesky_size(5000), gpytorch.settings.skip_posterior_variances(), gpytorch.settings.fast_computations(False):
    output = model(train_x)
    loss = -mll(output, train_y)
    print(f'{loss:.3f}')

-0.829


In [211]:
torch.cuda.empty_cache()

In [212]:
# Get into evaluation (predictive posterior) mode
model.eval()
likelihood.eval()

# Test points are regularly spaced along [0,1]
# Make predictions by feeding model through likelihood
with torch.no_grad(), gpytorch.settings.fast_pred_var(), gpytorch.settings.skip_posterior_variances(), gpytorch.settings.max_cholesky_size(5000):
    observed_pred = likelihood(model(test_x))

## Plot the model fit

In the next cell, we plot the mean and confidence region of the Gaussian process model. The `confidence_region` method is a helper method that returns 2 standard deviations above and below the mean.

In [213]:
torch.sqrt(torch.mean(torch.pow(observed_pred.mean - test_y, 2)))

tensor(0.0740, device='cuda:0')

In [202]:
# Get into evaluation (predictive posterior) mode
model.eval()
likelihood.eval()

# Test points are regularly spaced along [0,1]
# Make predictions by feeding model through likelihood
with torch.no_grad(), gpytorch.settings.fast_pred_var(), gpytorch.settings.max_cholesky_size(5000):
    observed_pred = likelihood(model(train_x))



In [203]:
torch.sqrt(torch.mean(torch.pow(observed_pred.mean - train_y, 2)))

tensor(0.0002, device='cuda:0')

In [41]:
K

<gpytorch.lazy.non_lazy_tensor.NonLazyTensor at 0x7f8370d57ba8>

NameError: name 'Z' is not defined