In [1]:
import math
import torch
import gpytorch
from matplotlib import pyplot as plt

# Make plots inline
%matplotlib inline

"""
Original documentation:
https://gpytorch.readthedocs.io/en/latest/examples/02_Scalable_Exact_GPs/SGPR_Regression_CUDA.html
SGPR Source code:
https://gpytorch.readthedocs.io/en/latest/_modules/gpytorch/kernels/inducing_point_kernel.html#InducingPointKernel
Inducing and other scalable kernels:
https://gpytorch.readthedocs.io/en/latest/kernels.html#inducingpointkernel
"""

In [2]:
import urllib.request
import os
from scipy.io import loadmat
from math import floor


# this is for running the notebook in our testing framework
smoke_test = ('CI' in os.environ)


if not smoke_test and not os.path.isfile('../elevators.mat'):
    print('Downloading \'elevators\' UCI dataset...')
    urllib.request.urlretrieve('https://drive.google.com/uc?export=download&id=1jhWL3YUHvXIaftia4qeAyDwVxo6j1alk', '../elevators.mat')


if smoke_test:  # this is for running the notebook in our testing framework
    X, y = torch.randn(1000, 3), torch.randn(1000)
else:
    data = torch.Tensor(loadmat('../elevators.mat')['data'])
    X = data[:, :-1]
    X = X - X.min(0)[0]
    X = 2 * (X / X.max(0)[0]) - 1
    y = data[:, -1]


train_n = int(floor(0.8 * len(X)))
train_x = X[:train_n, :].contiguous()
train_y = y[:train_n].contiguous()

test_x = X[train_n:, :].contiguous()
test_y = y[train_n:].contiguous()

if torch.cuda.is_available():
    train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()

Downloading 'elevators' UCI dataset...


In [3]:
X.size()

torch.Size([16599, 18])

In [4]:
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel, InducingPointKernel
from gpytorch.distributions import MultivariateNormal

class GPRegressionModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = ConstantMean()
        self.base_covar_module = ScaleKernel(RBFKernel())
        #  This model constructs a base scaled RBF kernel, 
        #  and then simply wraps it in an InducingPointKernel
        self.covar_module = InducingPointKernel(self.base_covar_module, 
                                                inducing_points=train_x[:500, :], 
                                                likelihood=likelihood)

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return MultivariateNormal(mean_x, covar_x)

In [10]:
print('initial inducing point \n', train_x[:500, :])
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GPRegressionModel(train_x, train_y, likelihood)

if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

initial inducing point 
 tensor([[-0.3009, -0.3884, -0.1061,  ...,  0.0915, -1.0000,  0.5880],
        [ 0.4625, -0.0162,  0.4471,  ...,  0.0906, -1.0000,  0.4707],
        [ 0.1282, -0.0148,  0.1624,  ...,  0.0957, -1.0000,  0.1737],
        ...,
        [ 0.7036, -0.5959,  0.2029,  ...,  0.0901, -1.0000,  0.7060],
        [ 0.0261, -0.3266, -0.0317,  ...,  0.0893, -1.0000,  0.7651],
        [-0.1910, -0.3568,  0.2107,  ...,  0.0905, -1.0000,  0.5292]],
       device='cuda:0')


In [9]:
training_iterations = 2 if smoke_test else 50

# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

def train():
    for i in range(training_iterations):
        # Zero backprop gradients
        optimizer.zero_grad()
        # Get output from model
        output = model(train_x)
        # Calc loss and backprop derivatives
        loss = -mll(output, train_y)
        loss.backward()
        print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
        print('inducing points \n', model.covar_module.inducing_points)
        optimizer.step()
        torch.cuda.empty_cache()

# See dkl_mnist.ipynb for explanation of this flag
%time train()

Iter 1/50 - Loss: 0.024
inducing points 
 Parameter containing:
tensor([[-0.3012, -0.3886, -0.1058,  ...,  0.0911, -1.0000,  0.5881],
        [ 0.4628, -0.0157,  0.4472,  ...,  0.0908, -1.0000,  0.4706],
        [ 0.1282, -0.0152,  0.1629,  ...,  0.0918, -1.0000,  0.1755],
        ...,
        [ 0.7038, -0.5959,  0.2032,  ...,  0.0908, -1.0000,  0.7059],
        [ 0.0264, -0.3265, -0.0324,  ...,  0.0904, -1.0000,  0.7645],
        [-0.1911, -0.3571,  0.2111,  ...,  0.0908, -1.0000,  0.5293]],
       device='cuda:0', requires_grad=True)
Iter 2/50 - Loss: 0.006
inducing points 
 Parameter containing:
tensor([[-0.3012, -0.3886, -0.1058,  ...,  0.0911, -1.0000,  0.5881],
        [ 0.4628, -0.0157,  0.4472,  ...,  0.0908, -1.0000,  0.4706],
        [ 0.1281, -0.0152,  0.1629,  ...,  0.0919, -1.0000,  0.1754],
        ...,
        [ 0.7038, -0.5959,  0.2032,  ...,  0.0908, -1.0000,  0.7059],
        [ 0.0264, -0.3265, -0.0324,  ...,  0.0905, -1.0000,  0.7646],
        [-0.1911, -0.3571,  0.2

Iter 19/50 - Loss: -0.288
inducing points 
 Parameter containing:
tensor([[-0.3011, -0.3886, -0.1059,  ...,  0.0911, -1.0000,  0.5881],
        [ 0.4627, -0.0158,  0.4471,  ...,  0.0907, -1.0000,  0.4706],
        [ 0.1281, -0.0150,  0.1628,  ...,  0.0926, -1.0000,  0.1750],
        ...,
        [ 0.7038, -0.5959,  0.2032,  ...,  0.0907, -1.0000,  0.7059],
        [ 0.0263, -0.3265, -0.0323,  ...,  0.0900, -1.0000,  0.7644],
        [-0.1911, -0.3570,  0.2110,  ...,  0.0907, -1.0000,  0.5293]],
       device='cuda:0', requires_grad=True)
Iter 20/50 - Loss: -0.307
inducing points 
 Parameter containing:
tensor([[-0.3011, -0.3886, -0.1059,  ...,  0.0911, -1.0000,  0.5881],
        [ 0.4627, -0.0158,  0.4471,  ...,  0.0907, -1.0000,  0.4706],
        [ 0.1282, -0.0150,  0.1628,  ...,  0.0927, -1.0000,  0.1749],
        ...,
        [ 0.7038, -0.5959,  0.2032,  ...,  0.0907, -1.0000,  0.7059],
        [ 0.0263, -0.3265, -0.0323,  ...,  0.0901, -1.0000,  0.7644],
        [-0.1911, -0.3570, 

Iter 37/50 - Loss: -0.564
inducing points 
 Parameter containing:
tensor([[-0.3010, -0.3885, -0.1059,  ...,  0.0913, -1.0000,  0.5880],
        [ 0.4626, -0.0159,  0.4471,  ...,  0.0907, -1.0000,  0.4706],
        [ 0.1282, -0.0149,  0.1626,  ...,  0.0942, -1.0000,  0.1745],
        ...,
        [ 0.7037, -0.5959,  0.2031,  ...,  0.0905, -1.0000,  0.7060],
        [ 0.0262, -0.3266, -0.0320,  ...,  0.0898, -1.0000,  0.7658],
        [-0.1910, -0.3569,  0.2108,  ...,  0.0906, -1.0000,  0.5292]],
       device='cuda:0', requires_grad=True)
Iter 38/50 - Loss: -0.577
inducing points 
 Parameter containing:
tensor([[-0.3010, -0.3885, -0.1060,  ...,  0.0913, -1.0000,  0.5880],
        [ 0.4626, -0.0159,  0.4471,  ...,  0.0907, -1.0000,  0.4706],
        [ 0.1282, -0.0148,  0.1626,  ...,  0.0943, -1.0000,  0.1745],
        ...,
        [ 0.7037, -0.5959,  0.2031,  ...,  0.0905, -1.0000,  0.7060],
        [ 0.0262, -0.3266, -0.0320,  ...,  0.0897, -1.0000,  0.7657],
        [-0.1910, -0.3569, 

In [7]:
model.eval()
likelihood.eval()
with gpytorch.settings.max_preconditioner_size(10), torch.no_grad():
    with gpytorch.settings.max_root_decomposition_size(30), gpytorch.settings.fast_pred_var():
        preds = model(test_x)

In [8]:
print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - test_y))))


Test MAE: 0.07273373752832413
