In [175]:
import tqdm
import math
import torch
import gpytorch
from matplotlib import pyplot as plt
import numpy as np

# Make plots inline
%matplotlib inline

In [176]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import MultivariateNormal

In [177]:
import urllib.request
import os
from scipy.io import loadmat
from math import floor
import h5py

filename = 'elevators.mat'

data = loadmat(filename)
data = data['data']

In [178]:
X = data[:, :-1]
X = X - X.min(0)[0]
X = 2 * (X / X.max(0)[0]) - 1
y = data[:, -1]

train_n = int(floor(0.8 * len(X)))
train_x = torch.Tensor(X[:train_n, :])
train_y = torch.Tensor(y[:train_n])

test_x = torch.Tensor(X[train_n:, :])
test_y = torch.Tensor(y[train_n:])

if torch.cuda.is_available():
    train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()

In [179]:
from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(train_x, train_y)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)

test_dataset = TensorDataset(test_x, test_y)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)

In [180]:
class CustomGPModel(nn.Module):
    def __init__(self, inducing_points, input_dim):
        super(CustomGPModel, self).__init__()
        self.inducing_points = inducing_points
        self.num_inducing = inducing_points.size(0)
        
        self.mean_module = nn.Parameter(torch.zeros(1))
        self.log_scale = nn.Parameter(torch.zeros(1))
        self.log_lengthscale = nn.Parameter(torch.zeros(input_dim))

        self.variational_mean = nn.Parameter(torch.zeros(self.num_inducing))
        self.variational_covar = nn.Parameter(torch.eye(self.num_inducing)) #+ 1e-6 * torch.eye(self.num_inducing))
        
    def kernel(self, x1, x2):
        lengthscale = torch.exp(self.log_lengthscale)
        scale = torch.exp(self.log_scale)
        diff = (x1.unsqueeze(1) - x2.unsqueeze(0)) / lengthscale
        return scale * torch.exp(-0.5 * (diff ** 2).sum(-1))
    
    def forward(self, x):
        Kmm = self.kernel(self.inducing_points, self.inducing_points) #+ 1e-6 * torch.eye(self.num_inducing)
        Kmn = self.kernel(self.inducing_points, x)
        Knn = self.kernel(x, x)
        
        Kmm_inv = torch.inverse(Kmm)
        
        mean = self.mean_module + Kmn.t() @ Kmm_inv @ self.variational_mean
        covar = Knn + Kmn.t() @ Kmm_inv @ (self.variational_covar - Kmm) @ Kmm_inv @ Kmn
        
        jitter = torch.abs(covar.diag().sum()) * torch.eye(covar.size(0)).to(covar)
        covar += 2.0 * jitter

        return MultivariateNormal(mean, covar)

In [181]:
# Generate inducing points
input_dim = train_x.size(1)
inducing_points = train_x[:500, :]

In [182]:
# Instantiate the model
model = CustomGPModel(inducing_points=inducing_points, input_dim=input_dim)
if torch.cuda.is_available():
    model.cuda()

In [183]:
# Optimizers
variational_optimizer = optim.Adam([model.variational_mean, model.variational_covar], lr=0.1)
hyperparameter_optimizer = optim.Adam([model.mean_module, model.log_scale, model.log_lengthscale], lr=0.01)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    for x_batch, y_batch in train_loader:
        if torch.cuda.is_available():
            x_batch, y_batch = x_batch.cuda(), y_batch.cuda()
        
        variational_optimizer.zero_grad()
        hyperparameter_optimizer.zero_grad()
        
        output = model(x_batch)
        loss = -output.log_prob(y_batch).mean()
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        variational_optimizer.step()
        hyperparameter_optimizer.step()
        
        epoch_loss += loss.item()
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / len(train_loader)}")


ValueError: Expected parameter covariance_matrix (Tensor of shape (2, 2)) of distribution MultivariateNormal(loc: torch.Size([2]), covariance_matrix: torch.Size([2, 2])) to satisfy the constraint PositiveDefinite(), but found invalid values:
tensor([[ 35997.0000,   5088.9976],
        [-20263.0020,  10581.0000]], grad_fn=<ExpandBackward0>)