In [1]:
%load_ext autoreload
%autoreload 2

import torch
import gpytorch
import pinot
import math


Using backend: pytorch


In [2]:
data = pinot.data.esol()
ds_tr, ds_te = pinot.data.utils.split(data, [4, 1])



In [3]:
batch_size = 32
ds_tr_onebatch = pinot.data.utils.batch(ds_tr, len(ds_tr))
ds_tr_batched  = pinot.data.utils.batch(ds_tr, 32)
ds_te_onebatch = pinot.data.utils.batch(ds_te, len(ds_te))

# Variational GP

In [6]:
def rmse(y, yhat):
    return torch.sqrt(torch.mean((y.flatten()-yhat.flatten())**2))

def run_experiment(num_inducing, kernel_name=None, kernel_params=None, mean_name="LinearMean", beta=1.0, device="cpu", n_epochs=50):
    dev = torch.device(device)
    net_variational_gp = pinot.Net(
        pinot.representation.Sequential(
            pinot.representation.dgl_legacy.gn(kwargs={"allow_zero_in_degree":True}),
                [64, 'relu', 64, 'relu', 64, 'relu']),
        output_regressor_class=pinot.regressors.VariationalGP,
        num_inducing_points=num_inducing,
        num_data=902,
        beta = beta,
        covar = getattr(gpytorch.kernels, kernel_name)(**kernel_params) if kernel_name is not None else None
    ).to(dev)
    
    lr = 1e-4
    optimizer = torch.optim.Adam([
        {'params': net_variational_gp.representation.parameters(), 'weight_decay': 1e-4},
        {'params': net_variational_gp.output_regressor.parameters(), 'lr': lr*0.1}
    ], lr=lr)

    for n in range(n_epochs):
        total_loss = 0.
        for (g, y) in ds_tr_onebatch:
            optimizer.zero_grad()
            loss = net_variational_gp.loss(g.to(dev), y.flatten().to(dev))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
    train_rmse = rmse(net_variational_gp.condition(ds_tr_onebatch[0][0].to(dev)).mean.cpu(), ds_tr_onebatch[0][1])
    test_rmse = rmse(net_variational_gp.condition(ds_te_onebatch[0][0].to(dev)).mean.cpu(), ds_te_onebatch[0][1])
    del net_variational_gp
    print(f"Train rmse = {train_rmse}, test rmse = {test_rmse} for {kernel_name} kernel, beta = {beta}, num_inducing_points = {num_inducing}, mean = {mean_name}")

    

In [7]:
for (kernel_name, kernel_params) in [
        (None, None),
        ("PolynomialKernel", {"power":3}),
        ("RBFKernel", {})
    ]:
    
    for num_inducing in [200, 400, 600]:
        run_experiment(num_inducing, kernel_name, kernel_params, device="cuda", n_epochs=2000)
    

Train rmse = 0.8605661392211914, test rmse = 0.7832201719284058 for None kernel, beta = 1.0, num_inducing_points = 200, mean = LinearMean
Train rmse = 0.7850788831710815, test rmse = 0.7508814334869385 for None kernel, beta = 1.0, num_inducing_points = 400, mean = LinearMean
Train rmse = 0.8468971848487854, test rmse = 0.8037276268005371 for None kernel, beta = 1.0, num_inducing_points = 600, mean = LinearMean
