# Introduction
As described in the repo README, the experiment runner provides the fixture for running experiments on UCI data sets. One can also use this repo as a module providing an API to the RPA-GP family of GP models. 

Below are demonstrations on how to create and use these models on example data.

In [1]:
# Preliminary imports
import torch
from torch.optim import Adam
from gpytorch.kernels import RBFKernel, ScaleKernel, AdditiveStructureKernel
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.mlls import ExactMarginalLogLikelihood

7.4.0


In [2]:
from gp_experiment_runner import _determine_folds, _access_fold, _normalize_by_train

In [3]:
# Load a data set
from gp_experiment_runner import load_dataset  # assumes UCI datasets are available locally
dataset = load_dataset('yacht') # returns a Pandas dataframe

n_train = len(dataset) // 10 * 9
train = dataset.iloc[:n_train]
test = dataset.iloc[n_train:]

# Or use utilities from repo:
# from gp_experiment_runner import _determine_folds, _access_fold, _normalize_by_train
# folds = _determine_folds(0.1, dataset)
# train, test = _access_fold(dataset, folds, 0)
# train, test = _normalize_by_train(train, test)

train_x = torch.tensor(train.iloc[:, :-1].values, dtype=torch.float)
train_y = torch.tensor(train.iloc[:, -1].values, dtype=torch.float)
test_x = torch.tensor(test.iloc[:, :-1].values, dtype=torch.float)
test_y = torch.tensor(test.iloc[:, -1].values, dtype=torch.float)

### Create an RPA-GP model

In [4]:
from gp_models import ExactGPModel, ScaledProjectionKernel
import rp
n, d = train_x.shape
num_projs = 20

# Draw random projections and store in a linear module
# Here, we are drawing 20 Gaussian projections into 1 dimension.
projs = [rp.gen_rp(d, 1, dist='gaussian') for _ in range(num_projs)]
proj_module = torch.nn.Linear(d, num_projs, bias=False)
proj_module.weight.data = torch.cat(projs, dim=1).t()

# Create the additive model that operates over these projections
# Fixing the outputscale and lengthscale of the base kernels.
base_kernel = RBFKernel()
base_kernel.initialize(lengthscale=torch.tensor([1.]))
base_kernel = ScaleKernel(base_kernel)
base_kernel.initialize(outputscale=torch.tensor([1/num_projs]))

# Combine into a single module.
kernel = ScaledProjectionKernel(proj_module, base_kernel, 
                                prescale=False,
                                ard_num_dims=num_projs,
                                learn_proj=False)
# Or, just call the method from training_routines that wraps this initialization
# from training_routines import create_additive_rp_kernel
# create_additive_rp_kernel(d, num_projs, learn_proj=False, kernel_type='RBF', 
#                           space_proj=False, prescale=False, ard=True, k=1, 
#                           proj_dist='gaussian')

kernel = ScaleKernel(kernel) # Optionally wrap with an additional ScaleKernel 

# Create an ExactGP model with this kernel
likelihood = GaussianLikelihood()
likelihood.noise = 1.
model = ExactGPModel(train_x, train_y, likelihood, kernel)

### Train the model

In [5]:
# Train the model
mll = ExactMarginalLogLikelihood(model.likelihood, model)
mll.train()
optimizer = Adam(mll.parameters(), lr=0.01)
for iteration in range(1000):
    optimizer.zero_grad()
    output = model(train_x)
    loss = -mll(output, train_y)
    loss.backward()
    optimizer.step()
    msg = "Iter {}: Loss={:2.4f}, Noise={:2.4f}".format(iteration, loss.item(), model.likelihood.noise.item())
    print(msg)

Iter 0: Loss=1.4288, Noise=0.9937
Iter 1: Loss=1.4287, Noise=0.9893
Iter 2: Loss=1.4288, Noise=0.9862
Iter 3: Loss=1.4290, Noise=0.9824
Iter 4: Loss=1.4288, Noise=0.9824
Iter 5: Loss=1.4289, Noise=0.9836
Iter 6: Loss=1.4286, Noise=0.9857
Iter 7: Loss=1.4286, Noise=0.9879
Iter 8: Loss=1.4285, Noise=0.9907
Iter 9: Loss=1.4286, Noise=0.9941
Iter 10: Loss=1.4288, Noise=0.9972
Iter 11: Loss=1.4286, Noise=0.9992
Iter 12: Loss=1.4287, Noise=0.9998
Iter 13: Loss=1.4288, Noise=0.9990
Iter 14: Loss=1.4287, Noise=0.9976
Iter 15: Loss=1.4287, Noise=0.9952
Iter 16: Loss=1.4288, Noise=0.9918
Iter 17: Loss=1.4288, Noise=0.9883
Iter 18: Loss=1.4288, Noise=0.9851
Iter 19: Loss=1.4287, Noise=0.9811
Iter 20: Loss=1.4286, Noise=0.9770
Iter 21: Loss=1.4289, Noise=0.9738
Iter 22: Loss=1.4286, Noise=0.9699
Iter 23: Loss=1.4287, Noise=0.9671
Iter 24: Loss=1.4288, Noise=0.9654
Iter 25: Loss=1.4288, Noise=0.9635
Iter 26: Loss=1.4289, Noise=0.9611
Iter 27: Loss=1.4289, Noise=0.9600
Iter 28: Loss=1.4289, Noise=0.

Iter 231: Loss=1.4289, Noise=0.9791
Iter 232: Loss=1.4285, Noise=0.9789
Iter 233: Loss=1.4289, Noise=0.9787
Iter 234: Loss=1.4288, Noise=0.9787
Iter 235: Loss=1.4288, Noise=0.9788
Iter 236: Loss=1.4287, Noise=0.9798
Iter 237: Loss=1.4287, Noise=0.9811
Iter 238: Loss=1.4286, Noise=0.9822
Iter 239: Loss=1.4288, Noise=0.9821
Iter 240: Loss=1.4287, Noise=0.9817
Iter 241: Loss=1.4288, Noise=0.9812
Iter 242: Loss=1.4288, Noise=0.9818
Iter 243: Loss=1.4286, Noise=0.9835
Iter 244: Loss=1.4288, Noise=0.9853
Iter 245: Loss=1.4287, Noise=0.9864
Iter 246: Loss=1.4287, Noise=0.9877
Iter 247: Loss=1.4287, Noise=0.9890
Iter 248: Loss=1.4287, Noise=0.9896
Iter 249: Loss=1.4289, Noise=0.9901
Iter 250: Loss=1.4290, Noise=0.9903
Iter 251: Loss=1.4288, Noise=0.9900
Iter 252: Loss=1.4287, Noise=0.9896
Iter 253: Loss=1.4288, Noise=0.9897
Iter 254: Loss=1.4288, Noise=0.9896
Iter 255: Loss=1.4285, Noise=0.9894
Iter 256: Loss=1.4287, Noise=0.9896
Iter 257: Loss=1.4288, Noise=0.9899
Iter 258: Loss=1.4287, Noise

Iter 461: Loss=1.4289, Noise=1.0047
Iter 462: Loss=1.4287, Noise=1.0046
Iter 463: Loss=1.4288, Noise=1.0050
Iter 464: Loss=1.4286, Noise=1.0056
Iter 465: Loss=1.4288, Noise=1.0059
Iter 466: Loss=1.4287, Noise=1.0063
Iter 467: Loss=1.4289, Noise=1.0067
Iter 468: Loss=1.4289, Noise=1.0072
Iter 469: Loss=1.4288, Noise=1.0068
Iter 470: Loss=1.4287, Noise=1.0054
Iter 471: Loss=1.4289, Noise=1.0046
Iter 472: Loss=1.4288, Noise=1.0035
Iter 473: Loss=1.4287, Noise=1.0016
Iter 474: Loss=1.4288, Noise=1.0000
Iter 475: Loss=1.4288, Noise=0.9980
Iter 476: Loss=1.4288, Noise=0.9955
Iter 477: Loss=1.4286, Noise=0.9938
Iter 478: Loss=1.4287, Noise=0.9927
Iter 479: Loss=1.4288, Noise=0.9908
Iter 480: Loss=1.4287, Noise=0.9893
Iter 481: Loss=1.4286, Noise=0.9878
Iter 482: Loss=1.4288, Noise=0.9873
Iter 483: Loss=1.4286, Noise=0.9875
Iter 484: Loss=1.4286, Noise=0.9876
Iter 485: Loss=1.4287, Noise=0.9887
Iter 486: Loss=1.4287, Noise=0.9895
Iter 487: Loss=1.4285, Noise=0.9898
Iter 488: Loss=1.4288, Noise

Iter 691: Loss=1.4288, Noise=0.9831
Iter 692: Loss=1.4287, Noise=0.9856
Iter 693: Loss=1.4287, Noise=0.9883
Iter 694: Loss=1.4287, Noise=0.9905
Iter 695: Loss=1.4285, Noise=0.9929
Iter 696: Loss=1.4287, Noise=0.9936
Iter 697: Loss=1.4288, Noise=0.9943
Iter 698: Loss=1.4286, Noise=0.9951
Iter 699: Loss=1.4288, Noise=0.9952
Iter 700: Loss=1.4286, Noise=0.9949
Iter 701: Loss=1.4288, Noise=0.9937
Iter 702: Loss=1.4287, Noise=0.9920
Iter 703: Loss=1.4287, Noise=0.9896
Iter 704: Loss=1.4287, Noise=0.9865
Iter 705: Loss=1.4286, Noise=0.9834
Iter 706: Loss=1.4287, Noise=0.9808
Iter 707: Loss=1.4287, Noise=0.9783
Iter 708: Loss=1.4287, Noise=0.9756
Iter 709: Loss=1.4287, Noise=0.9734
Iter 710: Loss=1.4286, Noise=0.9726
Iter 711: Loss=1.4287, Noise=0.9724
Iter 712: Loss=1.4287, Noise=0.9721
Iter 713: Loss=1.4286, Noise=0.9723
Iter 714: Loss=1.4285, Noise=0.9733
Iter 715: Loss=1.4287, Noise=0.9748
Iter 716: Loss=1.4285, Noise=0.9770
Iter 717: Loss=1.4287, Noise=0.9789
Iter 718: Loss=1.4287, Noise

Iter 920: Loss=1.4287, Noise=0.9853
Iter 921: Loss=1.4287, Noise=0.9867
Iter 922: Loss=1.4287, Noise=0.9875
Iter 923: Loss=1.4288, Noise=0.9884
Iter 924: Loss=1.4287, Noise=0.9894
Iter 925: Loss=1.4286, Noise=0.9902
Iter 926: Loss=1.4289, Noise=0.9901
Iter 927: Loss=1.4286, Noise=0.9907
Iter 928: Loss=1.4287, Noise=0.9909
Iter 929: Loss=1.4286, Noise=0.9902
Iter 930: Loss=1.4286, Noise=0.9883
Iter 931: Loss=1.4288, Noise=0.9868
Iter 932: Loss=1.4287, Noise=0.9859
Iter 933: Loss=1.4287, Noise=0.9859
Iter 934: Loss=1.4289, Noise=0.9850
Iter 935: Loss=1.4284, Noise=0.9838
Iter 936: Loss=1.4287, Noise=0.9824
Iter 937: Loss=1.4288, Noise=0.9815
Iter 938: Loss=1.4288, Noise=0.9813
Iter 939: Loss=1.4287, Noise=0.9824
Iter 940: Loss=1.4287, Noise=0.9841
Iter 941: Loss=1.4287, Noise=0.9874
Iter 942: Loss=1.4288, Noise=0.9903
Iter 943: Loss=1.4288, Noise=0.9935
Iter 944: Loss=1.4287, Noise=0.9969
Iter 945: Loss=1.4286, Noise=0.9990
Iter 946: Loss=1.4289, Noise=1.0020
Iter 947: Loss=1.4286, Noise

In [6]:
# Test the model
model.eval()
output = model(test_x)
rmse = (test_y - output.mean).pow(2).mean().sqrt().item()
neg_log_lik = -mll(output, test_y).item()
print('RMSE={:2.4f}, Neg Log Likelihood={:2.4f}'.format(rmse, neg_log_lik))

RMSE=0.9146, Neg Log Likelihood=1.3390


### Create an DPA-GP-ARD model

In [7]:
# Draw random projections, `diversify` and store in a linear module
# Here, we are drawing 20 Gaussian projections into 1 dimension.
projs = [rp.gen_rp(d, 1, dist='gaussian') for _ in range(num_projs)] # initial directions

newW, _ = rp.space_equally(torch.cat(projs,dim=1).t(), lr=0.1, niter=5000) # Try to diversify
newW.requires_grad = False # Make sure they aren't trainable
projs = [newW[i:i+1, :].t() for i in range(0, num_projs, 1)]  # Reshape like initial directions

proj_module = torch.nn.Linear(d, num_projs, bias=False)
proj_module.weight.data = torch.cat(projs, dim=1).t()

# Create the additive model that operates over these projections
# Fixing the outputscale and lengthscale of the base kernels.
base_kernel = RBFKernel()
base_kernel.initialize(lengthscale=torch.tensor([1.]))
base_kernel = ScaleKernel(base_kernel)
base_kernel.initialize(outputscale=torch.tensor([1/num_projs]))

# Combine into a single module.
# Using prescale=True applies lengthscales to original input space, i.e. scaling pre-projection.
kernel = ScaledProjectionKernel(proj_module, base_kernel, 
                                prescale=True,  
                                ard_num_dims=d,
                                learn_proj=False)
# Or, just call the method from training_routines that wraps this initialization
# from training_routines import create_additive_rp_kernel
# create_additive_rp_kernel(d, num_projs, learn_proj=False, kernel_type='RBF', 
#                           space_proj=True, prescale=True, ard=True, k=1, 
#                           proj_dist='gaussian')

kernel = ScaleKernel(kernel) # Optionally wrap with an additional ScaleKernel 

# Create an ExactGP model with this kernel
likelihood = GaussianLikelihood()
likelihood.noise = 1.
model = ExactGPModel(train_x, train_y, GaussianLikelihood(), kernel)

### Train the model

In [8]:
# Train the model
mll = ExactMarginalLogLikelihood(model.likelihood, model)
model.train()
model.likelihood.train()
optimizer = Adam(mll.parameters(), lr=0.01)
for iteration in range(1000):
    optimizer.zero_grad()
    output = model(train_x)
    loss = -mll(output, train_y)
    loss.backward()
    optimizer.step()
    msg = "Iter {}: Loss={:2.4f}, Noise={:2.4f}".format(iteration, loss.item(), model.likelihood.noise.item())
    print(msg)

Iter 0: Loss=1.4611, Noise=0.6983
Iter 1: Loss=1.4591, Noise=0.7033
Iter 2: Loss=1.4581, Noise=0.7084
Iter 3: Loss=1.4563, Noise=0.7134
Iter 4: Loss=1.4553, Noise=0.7185
Iter 5: Loss=1.4539, Noise=0.7236
Iter 6: Loss=1.4528, Noise=0.7287
Iter 7: Loss=1.4515, Noise=0.7337
Iter 8: Loss=1.4502, Noise=0.7387
Iter 9: Loss=1.4492, Noise=0.7437
Iter 10: Loss=1.4483, Noise=0.7486
Iter 11: Loss=1.4474, Noise=0.7535
Iter 12: Loss=1.4466, Noise=0.7584
Iter 13: Loss=1.4452, Noise=0.7633
Iter 14: Loss=1.4446, Noise=0.7682
Iter 15: Loss=1.4438, Noise=0.7731
Iter 16: Loss=1.4430, Noise=0.7779
Iter 17: Loss=1.4421, Noise=0.7827
Iter 18: Loss=1.4414, Noise=0.7875
Iter 19: Loss=1.4406, Noise=0.7921
Iter 20: Loss=1.4395, Noise=0.7969
Iter 21: Loss=1.4391, Noise=0.8015
Iter 22: Loss=1.4388, Noise=0.8062
Iter 23: Loss=1.4380, Noise=0.8109
Iter 24: Loss=1.4374, Noise=0.8155
Iter 25: Loss=1.4368, Noise=0.8201
Iter 26: Loss=1.4362, Noise=0.8247
Iter 27: Loss=1.4358, Noise=0.8292
Iter 28: Loss=1.4351, Noise=0.

Iter 236: Loss=1.3931, Noise=0.8487
Iter 237: Loss=1.3912, Noise=0.8453
Iter 238: Loss=1.3921, Noise=0.8418
Iter 239: Loss=1.3904, Noise=0.8382
Iter 240: Loss=1.3860, Noise=0.8345
Iter 241: Loss=1.3864, Noise=0.8307
Iter 242: Loss=1.3869, Noise=0.8270
Iter 243: Loss=1.3845, Noise=0.8234
Iter 244: Loss=1.3837, Noise=0.8197
Iter 245: Loss=1.3827, Noise=0.8161
Iter 246: Loss=1.3823, Noise=0.8125
Iter 247: Loss=1.3819, Noise=0.8089
Iter 248: Loss=1.3817, Noise=0.8054
Iter 249: Loss=1.3793, Noise=0.8018
Iter 250: Loss=1.3783, Noise=0.7981
Iter 251: Loss=1.3770, Noise=0.7944
Iter 252: Loss=1.3747, Noise=0.7907
Iter 253: Loss=1.3742, Noise=0.7869
Iter 254: Loss=1.3741, Noise=0.7830
Iter 255: Loss=1.3733, Noise=0.7789
Iter 256: Loss=1.3707, Noise=0.7748
Iter 257: Loss=1.3703, Noise=0.7705
Iter 258: Loss=1.3695, Noise=0.7661
Iter 259: Loss=1.3665, Noise=0.7618
Iter 260: Loss=1.3660, Noise=0.7575
Iter 261: Loss=1.3662, Noise=0.7532
Iter 262: Loss=1.3617, Noise=0.7489
Iter 263: Loss=1.3624, Noise

Iter 465: Loss=0.7192, Noise=0.0449
Iter 466: Loss=0.6982, Noise=0.0444
Iter 467: Loss=0.6868, Noise=0.0438
Iter 468: Loss=0.6968, Noise=0.0433
Iter 469: Loss=0.7079, Noise=0.0428
Iter 470: Loss=0.6471, Noise=0.0423
Iter 471: Loss=0.6836, Noise=0.0418
Iter 472: Loss=0.7049, Noise=0.0413
Iter 473: Loss=0.6410, Noise=0.0408
Iter 474: Loss=0.6784, Noise=0.0403
Iter 475: Loss=0.6943, Noise=0.0398
Iter 476: Loss=0.6646, Noise=0.0394
Iter 477: Loss=0.6339, Noise=0.0389
Iter 478: Loss=0.6752, Noise=0.0385
Iter 479: Loss=0.6788, Noise=0.0380
Iter 480: Loss=0.6647, Noise=0.0376
Iter 481: Loss=0.6417, Noise=0.0371
Iter 482: Loss=0.6307, Noise=0.0367
Iter 483: Loss=0.6724, Noise=0.0363
Iter 484: Loss=0.6697, Noise=0.0359
Iter 485: Loss=0.6904, Noise=0.0355
Iter 486: Loss=0.6433, Noise=0.0351
Iter 487: Loss=0.6461, Noise=0.0347
Iter 488: Loss=0.6522, Noise=0.0343
Iter 489: Loss=0.6363, Noise=0.0340
Iter 490: Loss=0.6407, Noise=0.0336
Iter 491: Loss=0.6291, Noise=0.0332
Iter 492: Loss=0.6267, Noise

Iter 693: Loss=0.3746, Noise=0.0093
Iter 694: Loss=0.3958, Noise=0.0093
Iter 695: Loss=0.4249, Noise=0.0093
Iter 696: Loss=0.3949, Noise=0.0092
Iter 697: Loss=0.3949, Noise=0.0092
Iter 698: Loss=0.3866, Noise=0.0092
Iter 699: Loss=0.3870, Noise=0.0092
Iter 700: Loss=0.3977, Noise=0.0091
Iter 701: Loss=0.3624, Noise=0.0091
Iter 702: Loss=0.3890, Noise=0.0091
Iter 703: Loss=0.3981, Noise=0.0091
Iter 704: Loss=0.3933, Noise=0.0090
Iter 705: Loss=0.3681, Noise=0.0090
Iter 706: Loss=0.3862, Noise=0.0090
Iter 707: Loss=0.3928, Noise=0.0090
Iter 708: Loss=0.3758, Noise=0.0089
Iter 709: Loss=0.3571, Noise=0.0089
Iter 710: Loss=0.3494, Noise=0.0089
Iter 711: Loss=0.4096, Noise=0.0089
Iter 712: Loss=0.3800, Noise=0.0088
Iter 713: Loss=0.3856, Noise=0.0088
Iter 714: Loss=0.3888, Noise=0.0088
Iter 715: Loss=0.3622, Noise=0.0088
Iter 716: Loss=0.3661, Noise=0.0088
Iter 717: Loss=0.3882, Noise=0.0087
Iter 718: Loss=0.3801, Noise=0.0087
Iter 719: Loss=0.3620, Noise=0.0087
Iter 720: Loss=0.3999, Noise

Iter 921: Loss=0.3006, Noise=0.0069
Iter 922: Loss=0.2771, Noise=0.0069
Iter 923: Loss=0.2909, Noise=0.0069
Iter 924: Loss=0.2895, Noise=0.0069
Iter 925: Loss=0.3229, Noise=0.0069
Iter 926: Loss=0.2950, Noise=0.0069
Iter 927: Loss=0.2773, Noise=0.0069
Iter 928: Loss=0.2914, Noise=0.0069
Iter 929: Loss=0.3101, Noise=0.0069
Iter 930: Loss=0.3202, Noise=0.0069
Iter 931: Loss=0.2933, Noise=0.0069
Iter 932: Loss=0.3314, Noise=0.0069
Iter 933: Loss=0.2874, Noise=0.0069
Iter 934: Loss=0.3178, Noise=0.0069
Iter 935: Loss=0.3051, Noise=0.0069
Iter 936: Loss=0.3162, Noise=0.0069
Iter 937: Loss=0.2740, Noise=0.0069
Iter 938: Loss=0.3202, Noise=0.0069
Iter 939: Loss=0.3081, Noise=0.0069
Iter 940: Loss=0.2592, Noise=0.0069
Iter 941: Loss=0.2610, Noise=0.0069
Iter 942: Loss=0.2842, Noise=0.0069
Iter 943: Loss=0.2868, Noise=0.0069
Iter 944: Loss=0.3304, Noise=0.0069
Iter 945: Loss=0.3001, Noise=0.0069
Iter 946: Loss=0.2440, Noise=0.0069
Iter 947: Loss=0.3413, Noise=0.0069
Iter 948: Loss=0.2889, Noise

In [9]:
# Test the model
with torch.no_grad():
    model.eval()
    model.likelihood.eval()
    output = model(test_x)
    rmse = (test_y - output.mean).pow(2).mean().sqrt().item()
    neg_log_lik = -mll(output, test_y).item()
    print('RMSE={:2.4f}, Neg Log Likelihood={:2.4f}'.format(rmse, neg_log_lik))

RMSE=0.8409, Neg Log Likelihood=0.1101
