### GPyTorch evals

Evaluates GPyTorch on some of the datasets on
which we benchmarked stochastic variational inference.

In [1]:
import math
import torch
import gpytorch
from matplotlib import pyplot as plt


import sklearn, matplotlib.pyplot as plt, numpy as np, os, time
from sklearn.gaussian_process import GaussianProcessRegressor as GPR
from scipy.stats import spearmanr
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel

if "benchmark_evals" not in os.getcwd():
    os.chdir(os.path.join("..", "benchmark_evals"))

home = os.getcwd()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [3]:
def get_xy(target_dir):
    os.chdir(target_dir)
    xfiles = [f for f in os.listdir() if f.endswith("xvalues.npy")]
    yfiles = [f for f in os.listdir() if f.endswith("yvalues.npy")]
    xfiles.sort()
    yfiles.sort()
    x, y = [], []
    for i, xfile in enumerate(xfiles):
        x.append(np.load(xfile).astype(np.float32))
        y.append(np.load(yfiles[i]))
    
    x = np.vstack(x)
    y = np.concatenate(y)
    return x, y

def get_flattened_xy(target_dir):
    os.chdir(target_dir)
    xfiles = [f for f in os.listdir() if f.endswith("xvalues.npy")]
    yfiles = [f for f in os.listdir() if f.endswith("yvalues.npy")]
    xfiles.sort()
    yfiles.sort()
    x, y = [], []
    for i, xfile in enumerate(xfiles):
        xinit = np.load(xfile).astype(np.float32)
        x.append(xinit.reshape((xinit.shape[0], xinit.shape[1] * xinit.shape[2])))
        y.append(np.load(yfiles[i]))
    
    x = np.vstack(x)
    y = np.concatenate(y)
    return x, y

In [4]:
def model_training(init_model, init_likelihood, train_x, train_y):
    init_time = time.time()
    init_model.train()
    init_likelihood.train()

    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, init_model)
    training_iter = 50

    for i in range(50):
        optimizer.zero_grad()
        output = init_model(train_x)
        loss = -mll(output, train_y)
        loss.backward()
        if i % 5 == 0:
            print(f'Iter {i+1}')
        optimizer.step()

    init_model.eval()
    init_likelihood.eval()
    end_time = time.time()
    print(f"Wallclock time: {end_time - init_time}")
    
def eval_model(init_model, init_likelihood, test_x, test_y):
    init_model.eval()
    init_likelihood.eval()
    test_x = torch.tensor(test_x).float().cuda()
    with torch.no_grad():
        f_preds = init_model(test_x)
        y_preds = init_likelihood(model(test_x))

        f_mean = f_preds.mean
        f_var = f_preds.variance
    f_mean = f_mean.cpu().numpy()
    print(spearmanr(f_mean, test_y))

In [6]:
trainx, trainy = get_xy(os.path.join(home, "gb1_eval", "onehot", "two_vs_rest", "train"))
testx, testy = get_xy(os.path.join(home, "gb1_eval", "onehot", "two_vs_rest", "test"))

trainx = torch.tensor(trainx).float().cuda()
trainy = torch.tensor(trainy).float().cuda()

likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = ExactGPModel(trainx, trainy, likelihood)
model.cuda()

model_training(model, likelihood, trainx, trainy)


del trainx

eval_model(model, likelihood, testx, testy)

torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  ../aten/src/ATen/native/BatchLinearAlgebra.cpp:1672.)
  res = torch.triangular_solve(right_tensor, self.evaluate(), upper=self.upper).solution


Iter 1
Iter 6
Iter 11
Iter 16
Iter 21
Iter 26
Iter 31
Iter 36
Iter 41
Iter 46
Wallclock time: 0.38500285148620605
SpearmanrResult(correlation=0.6358094289204631, pvalue=0.0)


In [5]:
trainx, trainy = get_flattened_xy(os.path.join(home, "fluorescence_eval", "onehot_conv", "standard", "train"))
testx, testy = get_flattened_xy(os.path.join(home, "fluorescence_eval", "onehot_conv", "standard", "test"))

trainx = torch.tensor(trainx).float().cuda()
trainy = torch.tensor(trainy).float().cuda()

likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = ExactGPModel(trainx, trainy, likelihood)
model.cuda()

model_training(model, likelihood, trainx, trainy)


del trainx

eval_model(model, likelihood, testx, testy)

RuntimeError: CUDA out of memory. Tried to allocate 2.68 GiB (GPU 0; 7.93 GiB total capacity; 5.85 GiB already allocated; 1.25 GiB free; 5.86 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
#Out of memory on an 8GB RAM GPU (that's what we used to run the SVI vs xGPR benchmark tests)
#and...we're done.

In [4]:
x, y = get_xy(os.path.join(home, "gb1_eval", "onehot", "three_vs_rest", "train"))
testx, testy = get_xy(os.path.join(home, "gb1_eval", "onehot", "three_vs_rest", "test"))
kernel = RBF() * ConstantKernel() + WhiteKernel()
wclock = time.time()
model = GPR(kernel=kernel, random_state=123, n_restarts_optimizer=5).fit(x,y)
print(time.time() - wclock)
preds = model.predict(testx)
spearmanr(preds, testy)



572.743899345398


SpearmanrResult(correlation=0.836091045184536, pvalue=0.0)