In [1]:
from gpytorch.models.pyro_deep_gp import AbstractPyroHiddenGPLayer, AbstractPyroDeepGP
from gpytorch.variational import VariationalStrategy, CholeskyVariationalDistribution
from gpytorch.kernels import MaternKernel, ScaleKernel, RBFKernel
from gpytorch.means import ConstantMean
from gpytorch.distributions import MultivariateNormal
from gpytorch.likelihoods import GaussianLikelihood

import torch

In [2]:
NUM_INDUCING = 128


class ToyHiddenGPLayer(AbstractPyroHiddenGPLayer):
    def __init__(self, input_dims, output_dims, name=""):
        inducing_points = torch.randn(output_dims, NUM_INDUCING, input_dims)
        
        variational_distribution = CholeskyVariationalDistribution(
            num_inducing_points=inducing_points.size(-2),
            batch_size=output_dims
        )
        
        variational_strategy = VariationalStrategy(
            self,
            inducing_points,
            variational_distribution,
            learn_inducing_locations=True
        )
        
        super().__init__(variational_strategy, input_dims, output_dims, name)
        
        batch_shape = torch.Size([output_dims])
        
        self.mean_module = ConstantMean(batch_shape=batch_shape)
        self.covar_module = ScaleKernel(
            MaternKernel(nu=2.5, batch_shape=batch_shape, ard_num_dims=input_dims), 
            batch_shape=batch_shape
        )
        
    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return MultivariateNormal(mean_x, covar_x)        

In [3]:
#TODO: Double inheritance
class ToyDeepGP(AbstractPyroDeepGP):
    def __init__(self, input_dims, output_dims, total_num_data, hidden_gp_layers, likelihood, name=""):
        inducing_points = torch.randn(output_dims, NUM_INDUCING, input_dims)
        
        variational_distribution = CholeskyVariationalDistribution(
            num_inducing_points=inducing_points.size(-2),
            batch_size=output_dims
        )
        
        variational_strategy = VariationalStrategy(
            self,
            inducing_points,
            variational_distribution,
            learn_inducing_locations=True
        )
        
        super().__init__(
            variational_strategy,
            input_dims,
            output_dims,
            total_num_data,
            hidden_gp_layers,
            likelihood,
            name
        )
        
        batch_shape = torch.Size([output_dims])
        
        self.mean_module = ConstantMean(batch_shape=batch_shape)
        self.covar_module = ScaleKernel(
            MaternKernel(nu=2.5, batch_shape=batch_shape, ard_num_dims=input_dims), 
            batch_shape=batch_shape
        )
    

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return MultivariateNormal(mean_x, covar_x)

In [4]:
import urllib.request
import os.path
from scipy.io import loadmat
from math import floor
import numpy as np

dataset_name = 'elevators'

data = torch.Tensor(loadmat(f'/home/jake.gardner/data/{dataset_name}.mat')['data'])
X = data[:, :-1]
y = data[:, -1]

N = data.shape[0]
np.random.seed(0)
data = data[np.random.permutation(np.arange(N)),:]

train_n = int(floor(0.8*len(X)))

train_x = X[:train_n, :].contiguous().cuda()
train_y = y[:train_n].contiguous().cuda()

test_x = X[train_n:, :].contiguous().cuda()
test_y = y[train_n:].contiguous().cuda()

mean = train_x.mean(dim=-2, keepdim=True)
std = train_x.std(dim=-2, keepdim=True) + 1e-6
train_x = (train_x - mean) / std
test_x = (test_x - mean) / std

mean,std = train_y.mean(),train_y.std()
train_y = (train_y - mean) / std
test_y = (test_y - mean) / std

In [5]:
print(f'Training data size: {train_x.size(-2)}, test data set: {test_x.size(-2)}, some float: {std:.3f}')

Training data size: 13279, test data set: 3320, some float: 0.253


In [6]:
likelihood = GaussianLikelihood()

hidden_gp = ToyHiddenGPLayer(train_x.size(-1), 10, name="layer1").cuda()
deep_gp = ToyDeepGP(10, 1, train_x.size(-2), [hidden_gp], likelihood, name="output_layer").cuda()

In [7]:
from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(train_x, train_y)
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)

In [12]:
from pyro.infer import SVI, TraceMeanField_ELBO, Trace_ELBO

from pyro import optim

optimizer = optim.Adam({"lr": 0.01})


elbo = Trace_ELBO(num_particles=32, vectorize_particles=True, max_plate_nesting=1)
svi = SVI(deep_gp.model, deep_gp.guide, optimizer, elbo)

for epoch_i in range(50):
    for minibatch_i, (x_batch, y_batch) in enumerate(train_loader):
        loss = svi.step(x_batch, y_batch)
        if minibatch_i % 13 == 0:
            print(f'Epoch {epoch_i}, [{minibatch_i} / {len(train_loader)}], loss = {loss:.3f}')

Epoch 0, [0 / 13], loss = 18878.777
Epoch 1, [0 / 13], loss = 19898.306
Epoch 2, [0 / 13], loss = 18890.442
Epoch 3, [0 / 13], loss = 19070.407
Epoch 4, [0 / 13], loss = 19208.500
Epoch 5, [0 / 13], loss = 19174.727
Epoch 6, [0 / 13], loss = 19726.260
Epoch 7, [0 / 13], loss = 19314.693
Epoch 8, [0 / 13], loss = 19310.157
Epoch 9, [0 / 13], loss = 18794.162
Epoch 10, [0 / 13], loss = 19389.263
Epoch 11, [0 / 13], loss = 19257.020
Epoch 12, [0 / 13], loss = 19171.615
Epoch 13, [0 / 13], loss = 19786.294
Epoch 14, [0 / 13], loss = 18354.105
Epoch 15, [0 / 13], loss = 18822.774
Epoch 16, [0 / 13], loss = 18309.904
Epoch 17, [0 / 13], loss = 18725.529
Epoch 18, [0 / 13], loss = 19306.708
Epoch 19, [0 / 13], loss = 18779.928
Epoch 20, [0 / 13], loss = 19381.310
Epoch 21, [0 / 13], loss = 19029.359
Epoch 22, [0 / 13], loss = 18623.788
Epoch 23, [0 / 13], loss = 18909.865
Epoch 24, [0 / 13], loss = 18826.594
Epoch 25, [0 / 13], loss = 18949.892
Epoch 26, [0 / 13], loss = 18494.295
Epoch 27, [

KeyboardInterrupt: 

In [11]:
18923.865/train_x.size(-2)

1.425097145869418