In [23]:
import tqdm
import math
import torch
import gpytorch
from matplotlib import pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
from numpy import genfromtxt
import sys
print(sys.executable)
# Make plots inline
%matplotlib inline

/work/flemingc/belal/anaconda3/envs/gpytorch/bin/python3


In [24]:
import urllib.request
import os
from scipy.io import loadmat
from math import floor


data = torch.Tensor(loadmat('elevators.mat')['data'])
X = data[:1000, :-1]
X = X - X.min(0)[0]
X = 2 * (X / X.max(0)[0].clamp_min(1e-6)) - 1
y = data[:1000, -1]
y = y.sub(y.mean()).div(y.std())


train_n = int(floor(0.8 * len(X)))
train_x = X[:train_n, :].contiguous()
train_y = y[:train_n].contiguous()

test_x = X[train_n:, :].contiguous()
test_y = y[train_n:].contiguous()

if torch.cuda.is_available():
    train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()

In [25]:
from gpytorch.models import ApproximateGP
from gpytorch.variational.nearest_neighbor_variational_strategy import NNVariationalStrategy


class GPModel(ApproximateGP):
    def __init__(self, inducing_points, likelihood, k=256, training_batch_size=256):

        m, d = inducing_points.shape
        self.m = m
        self.k = k

        variational_distribution = gpytorch.variational.MeanFieldVariationalDistribution(m)
         
        if torch.cuda.is_available():
            inducing_points = inducing_points.cuda()

        variational_strategy = NNVariationalStrategy(self, inducing_points, variational_distribution, k=k,
                                                     training_batch_size=training_batch_size)
        
        super(GPModel, self).__init__(variational_strategy)
        self.mean_module = gpytorch.means.ZeroMean()
        self.covar_module = gpytorch.kernels.MaternKernel(nu=2.5, ard_num_dims=d)

        self.likelihood = likelihood

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

    def __call__(self, x, prior=False, **kwargs):
        if x is not None:
            if x.dim() == 1:
                x = x.unsqueeze(-1)
        return self.variational_strategy(x=x, prior=False, **kwargs)

k = 256
training_batch_size = 64

likelihood = gpytorch.likelihoods.GaussianLikelihood()
# Note: one should use full training set as inducing points!
model = GPModel(inducing_points=train_x, likelihood=likelihood, k=k, training_batch_size=training_batch_size)

if torch.cuda.is_available():
    likelihood = likelihood.cuda()
    model = model.cuda()

In [None]:
num_epochs = 1000
num_batches = model.variational_strategy._total_training_batches


model.train()
likelihood.train()

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Our loss object. We're using the VariationalELBO
mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))


epochs_iter = tqdm.notebook.tqdm(range(num_epochs), desc="Epoch")
for epoch in epochs_iter:
    minibatch_iter = tqdm.notebook.tqdm(range(num_batches), desc="Minibatch", leave=False, disable=True)

    for i in minibatch_iter:
        optimizer.zero_grad()
        output = model(x=None)
        # Obtain the indices for mini-batch data
        current_training_indices = model.variational_strategy.current_training_indices
        # Obtain the y_batch using indices. It is important to keep the same order of train_x and train_y
        y_batch = train_y[...,current_training_indices]
        if torch.cuda.is_available():
            y_batch = y_batch.cuda()
        loss = -mll(output, y_batch)
        minibatch_iter.set_postfix(loss=loss.item())
        #print('Iter %d/%d - Loss: %.3f  noise: %.3f' % (
        #    i + 1, epochs_iter, loss.item(),
        #    model.likelihood.noise.item()
        #))
        loss.backward()
        optimizer.step()
    if epoch%50==0:
        print(loss.item())    

Epoch:   0%|          | 0/1000 [00:00<?, ?it/s]

5.520802021026611
1.265540361404419
1.8674070835113525
1.3259271383285522
1.3418428897857666
1.1944688558578491
1.0799590349197388
1.2433449029922485
1.1553714275360107
1.1896140575408936
1.1177351474761963
1.4591584205627441
1.1253724098205566
1.3295228481292725
1.2818654775619507
1.420972466468811
1.4664347171783447
1.2495375871658325
1.1933724880218506
1.4867392778396606


In [None]:
from torch.utils.data import TensorDataset, DataLoader


test_dataset = TensorDataset(test_x, test_y)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [6]:
model.eval()
likelihood.eval()
means = torch.tensor([0.])
test_mse = 0
with torch.no_grad():
    for x_batch, y_batch in test_loader:
        preds = model(x_batch)
        means = torch.cat([means, preds.mean.cpu()])

        diff = torch.pow(preds.mean - y_batch, 2)
        diff = diff.sum(dim=-1) / test_x.size(0) # sum over bsz and scaling
        diff = diff.mean() # average over likelihood_nsamples
        test_mse += diff
means = means[1:]
test_rmse = test_mse.sqrt().item()

In [22]:
from torch.utils.data import TensorDataset, DataLoader


test_dataset = TensorDataset(test_x, test_y)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [23]:
model.eval()
likelihood.eval()
means = torch.tensor([0.])
test_mse = 0
with torch.no_grad():
    for x_batch, y_batch in test_loader:
        preds = model(x_batch)
        means = torch.cat([means, preds.mean.cpu()])

        diff = torch.pow(preds.mean - y_batch, 2)
        diff = diff.sum(dim=-1) / test_x.size(0) # sum over bsz and scaling
        diff = diff.mean() # average over likelihood_nsamples
        test_mse += diff
means = means[1:]
test_rmse = test_mse.sqrt().item()

In [24]:
print(test_rmse)

0.8142833709716797
