In [1]:
import math
import torch
import gpytorch
import pyro
from matplotlib import pyplot as plt

# Make plots inline
%matplotlib inline

In [2]:
import urllib.request
import os.path
from scipy.io import loadmat
from math import floor

if not os.path.isfile('song.mat'):
    print('Downloading \'song\' UCI dataset...')
    urllib.request.urlretrieve('https://www.dropbox.com/s/mg91x4c0muatanp/song.mat?dl=1', 'song.mat')
    
data = torch.Tensor(loadmat('song.mat')['data'])
X = data[:, :-1]
X = X - X.min(0)[0]
X = 2 * (X / X.max(0)[0]) - 1
y = data[:, -1]

# Use the first 80% of the data for training, and the last 20% for testing.
train_n = int(floor(0.8*len(X)))

train_x = X[:train_n, :].contiguous().cuda()
train_y = y[:train_n].contiguous().cuda()

test_x = X[train_n:, :].contiguous().cuda()
test_y = y[train_n:].contiguous().cuda()

In [3]:
from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(train_x, train_y)
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)

In [4]:
data_dim = train_x.size(-1)

class LargeFeatureExtractor(torch.nn.Sequential):           
    def __init__(self):                                      
        super(LargeFeatureExtractor, self).__init__()        
        self.add_module('linear1', torch.nn.Linear(data_dim, 1000))
        self.add_module('relu1', torch.nn.ReLU())                  
        self.add_module('linear2', torch.nn.Linear(1000, 500))     
        self.add_module('relu2', torch.nn.ReLU())                  
        self.add_module('linear3', torch.nn.Linear(500, 50))       
        self.add_module('relu3', torch.nn.ReLU())                  
        self.add_module('linear4', torch.nn.Linear(50, 2))         
                                                             
feature_extractor = LargeFeatureExtractor().cuda()
# num_features is the number of final features extracted by the neural network, in this case 2.
num_features = 2

In [5]:
from gpytorch.models import PyroVariationalGP
from gpytorch.variational import VariationalDistribution, GridInterpolationVariationalStrategy
class PyroSVDKLGridInterpModel(PyroVariationalGP):
    def __init__(self, likelihood, grid_size=20, grid_bounds=[(-1, 1), (-1, 1)], name_prefix="svdkl_grid_example"):
        variational_distribution = VariationalDistribution(num_inducing_points=20*20)
        variational_strategy = GridInterpolationVariationalStrategy(self,
                                                                    grid_size=grid_size,
                                                                    grid_bounds=grid_bounds,
                                                                    variational_distribution=variational_distribution)
        super(PyroSVDKLGridInterpModel, self).__init__(variational_strategy, likelihood, name_prefix=name_prefix)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(
            log_lengthscale_prior=gpytorch.priors.SmoothedBoxPrior(0.001, 1., sigma=0.1, log_transform=True)
        ))

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [6]:
class DKLModel(gpytorch.Module):
    def __init__(self, likelihood, feature_extractor, num_features, grid_bounds=(-1., 1.)):
        super(DKLModel, self).__init__()
        self.feature_extractor = feature_extractor
        self.gp_layer = PyroSVDKLGridInterpModel(likelihood)
        self.grid_bounds = grid_bounds
        self.num_features = num_features

    def features(self, x):
        features = self.feature_extractor(x)
        features = gpytorch.utils.grid.scale_to_bounds(features, self.grid_bounds[0], self.grid_bounds[1])
        return features
    
    def forward(self, x):
        res = self.gp_layer(self.features(x))
        return res
    
    def guide(self, x, y):
        self.gp_layer.guide(self.features(x), y)
    
    def model(self, x, y):
        pyro.module(self.gp_layer.name_prefix + ".feature_extractor", self.feature_extractor)
        self.gp_layer.model(self.features(x), y)

likelihood = gpytorch.likelihoods.GaussianLikelihood().cuda()
model = DKLModel(likelihood, feature_extractor, num_features=num_features).cuda()

In [7]:
from pyro import optim
from pyro import infer

optimizer = optim.Adam({"lr": 0.1})

elbo = infer.Trace_ELBO(num_particles=20, vectorize_particles=True)
svi = infer.SVI(model.model, model.guide, optimizer, elbo)

In [8]:
num_epochs = 1

for i in range(num_epochs):
    # Within each iteration, we will go over each minibatch of data
    for minibatch_i, (x_batch, y_batch) in enumerate(train_loader):
        loss = svi.step(x_batch, y_batch)
        print('Epoch {} [{} / {}]'.format(i, minibatch_i, len(train_loader)), loss)

loss 2336019.675583496
loss 4158620.735316162
loss 4067251.380131836
loss 2096783.6699572755
loss 1210297.9459643555
loss 942903.0973748779
loss 574982.4999612427
loss 506037.5636941528
loss 402567.0265090942
loss 301674.42257751466
loss 274601.9205041504
loss 241471.38258026124
loss 237175.26527923584
loss 209330.1889819336
loss 172374.87983551025
loss 179163.2836514282
loss 159603.58023773192
loss 142606.30208221436
loss 121732.56744659424
loss 113145.68934265137
loss 105781.6995727539
loss 100264.59629302978
loss 91100.60944732666
loss 85801.01454498291
loss 77707.99836334228
loss 70148.3449899292
loss 68224.59989624024
loss 63423.49473144531
loss 59349.29501953125
loss 54972.7710055542
loss 52792.60175476074
loss 49289.64760681152
loss 45982.17471954346
loss 42778.187420349124
loss 40101.62431213379
loss 39797.46937896728
loss 35628.33073272705
loss 33586.76700012207
loss 33186.84608520508
loss 30147.970809020997
loss 28411.82062713623
loss 27577.87855895996
loss 25600.490164489747

loss 414.0151388549805
loss 407.7424078369141
loss 413.65128021240236
loss 407.16767517089846
loss 406.84657897949216
loss 415.9659326171875
loss 384.5176733398437
loss 402.68549713134763
loss 401.52425354003907
loss 390.20977294921875
loss 401.2391409301758
loss 355.9790972900391
loss 374.8517300415039
loss 383.7428646850586
loss 396.0201455688477
loss 390.69847991943357
loss 422.64212005615235
loss 384.98068267822265
loss 385.9450004577637
loss 413.06124298095705
loss 396.34627548217776
loss 387.88541091918944
loss 393.20414276123046
loss 379.7134860229492
loss 397.4904901123047
loss 371.2942074584961
loss 398.20728881835936
loss 361.9511828613281
loss 379.1823669433594
loss 397.4315545654297
loss 379.1300286865234
loss 362.81505096435546
loss 374.9832525634766
loss 369.4448498535156
loss 352.765442199707
loss 377.3762457275391
loss 381.14376159667967
loss 388.97028106689453
loss 353.4455297851562
loss 361.4295022583008
loss 385.1436657714844
loss 352.46007385253904
loss 373.10995758

In [9]:
model.eval()
likelihood.eval()
with torch.no_grad(), gpytorch.settings.use_toeplitz(False):
    preds = model(test_x)

In [10]:
print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - test_y))))

Test MAE: 0.5311331152915955
