In [3]:
import gpytorch
from gpytorch.kernels import ScaleKernel, RBFKernel, InducingPointKernel
import torch
from tqdm import tqdm
import numpy as np 
from torch.utils.data import TensorDataset, DataLoader

# This will control precision of Symeig / Cholenski Lin Alg Operations
gpytorch.settings.linalg_dtypes(default=torch.float16)
#TODO: How to ensure that all operations, data, etc. follow this precision constraint?

# Sample code for Batch Learning
def get_DataLoaders(train_x, train_y, test_x, test_y):
    train_dataset = TensorDataset(train_x, train_y)
    train_loader = DataLoader(train_dataset, batch_size=500, shuffle=True)

    test_dataset = TensorDataset(test_x, test_y)
    test_loader = DataLoader(test_dataset, batch_size=500, shuffle=False)
    return train_loader, test_loader

def train_and_test_approximate_gp(model_cls, train_x, train_y, test_y, train_loader, test_loader, num_epochs):
    inducing_points = torch.randn(128, train_x.size(-1), dtype=train_x.dtype, device=train_x.device)
    model = model_cls(inducing_points)
    
    # Likelihood
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    # Objective -> Variational Inference Uses ELBO
    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.numel())
    optimizer = torch.optim.Adam(list(model.parameters()) + list(likelihood.parameters()), lr=0.1)

    if torch.cuda.is_available():
        model = model.cuda()
        likelihood = likelihood.cuda()

    # Training
    model.train()
    likelihood.train()
    
    # TQDM is just a progress bar for training
    epochs_iter = tqdm.notebook.tqdm(range(num_epochs), desc=f"Training {model_cls.__name__}")
    
    for i in epochs_iter:
        # Within each iteration, we will go over each minibatch of data
        for x_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(x_batch)
            loss = -mll(output, y_batch)
            epochs_iter.set_postfix(loss=loss.item())
            loss.backward()
            optimizer.step()

    # Testing
    model.eval()
    likelihood.eval()
    means = torch.tensor([0.])
    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            preds = model(x_batch)
            means = torch.cat([means, preds.mean.cpu()])
    means = means[1:]
    error = torch.mean(torch.abs(means - test_y.cpu()))
    print(f"Test {model_cls.__name__} MAE: {error.item()}")

In [4]:
# Extends ApproximateGP
class StandardApproximateGP(gpytorch.models.ApproximateGP):
    def __init__(self, inducing_points):
        # Needs a Variational Distribution + Variation Strategy
        #TODO: Which ones do we need to implmenet?
        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(inducing_points.size(-2))
        variational_strategy = gpytorch.variational.VariationalStrategy(
            self, inducing_points, variational_distribution, learn_inducing_locations=True
        )
        super().__init__(variational_strategy) # Pass strategy upwards
        
        # Then Kernel and Mean as Normal
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [6]:
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = InducingPointKernel(gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()), inducing_points=torch.empty(1), likelihood=likelihood)

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# initialize likelihood and model

In [98]:
def greedy_select_points(model, inducing_point_candidates, train_x, train_y, mll):
    """
    Inplace Addition of Inducing Point
    
    Output: inducing_point_candidates - Tensor | Returns remaining candidate set of inducing points
    """
    # TODO: Fix Potential dimension errors
    # look at .resize(), torch.cat, len(train)
    # TODO: Pop train_x_copy index
    # Get current MLL

    random_indices = np.random.permutation(len(inducing_point_candidates))
    inducing_points = model.covar_module.inducing_points
    
    # Get MLL from current inducing points
    with torch.no_grad():
            output = model(train_x)
            current_model_mll = mll(output, train_y)

    # While we haven't found a point
    for index in random_indices:
        rnd = inducing_point_candidates[index].reshape(1,-1) # TODO: Make this better
        
        # Grab a point at random, calculate its likelihood
        temp = torch.cat((inducing_points, rnd),dim=0)
        
        # Update the inducing point kernel
        model.covar_module.inducing_points = torch.nn.Parameter(temp,requires_grad=False)

        # Get MLL for model with candidate inducing point
        with torch.no_grad(): 
            rnd_point_mll = mll(model(train_x), train_y)

        # If we've increased our likelihood, we've found our point
        if rnd_point_mll.sum() > current_model_mll.sum():
            # Catch edge case where we grab the last index
            if index+1 == len(inducing_point_candidates):
                return inducing_point_candidates[0:index]
            else:
                return torch.cat((inducing_point_candidates[0:index], inducing_point_candidates[index+1:]),dim=0) 

        
    # If we couldn't increase our likelihood, get rid of the last appended inducing point
    model.covar_module.inducing_points = torch.nn.Parameter(temp[:-1],requires_grad=False)
    return None

In [99]:
import math
train_x = torch.linspace(0, 1, 100)
# True function is sin(2*pi*x) with Gaussian noise
train_y = torch.sin(train_x * (2 * math.pi)) + torch.randn(train_x.size()) * math.sqrt(0.04)
train_x = train_x.reshape(-1,1)
train_y = train_y.reshape(-1,1)

In [100]:
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = ExactGPModel(train_x, train_y, likelihood)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood,model)
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters
# "Loss" for GPs - the marginal log likelihood
max_inducing_points = 1000
inducing_point_candidates = train_x.detach().clone().reshape(-1,1)
training_iter = 1000
print(inducing_point_candidates.size(),train_x.size(), train_y.size())

for i in range(max_inducing_points):
    # If haven't gotten any inducing points, grab a random one
    if len(inducing_point_candidates) == len(train_x):
        random_index = np.random.randint(0, len(train_x))
        first_inducing_point = train_x[random_index].detach().clone().reshape(1,-1) # Get
        model.covar_module.inducing_points = torch.nn.Parameter(first_inducing_point, requires_grad=False) # Set
        # Remove Selected Point from candidate set
        inducing_point_candidates = torch.cat((inducing_point_candidates[:random_index], inducing_point_candidates[random_index + 1 :]),dim=0)
        
    elif len(model.covar_module.inducing_points) >= max_inducing_points:
        print(f"Reached limit of inducing points: we have {len(model.covar_module.inducing_points)} points with a maximum of {max_inducing_points}")
        break    
    else:
        inducing_point_candidates = greedy_select_points(model,inducing_point_candidates,train_x,train_y,mll)
        if inducing_point_candidates is None:
            # We've failed to find a point that increases our Likelihood
            print("Failed to add inducing point, breaking")
            break
        
    # Zero gradients from previous iteration
    optimizer.zero_grad()
    mll.zero_grad()
    # Output from model
    output = model(train_x)
    # Calc loss and backprop gradients
    loss = -mll(output, train_y)
    print(loss.mean())
    loss.mean().backward()
    if i % 5 == 0:
        print('Iter %d/%d - Loss: %.3f' % (i + 1, 50, loss.mean()))
    torch.cuda.empty_cache()

    optimizer.step()
    
    # TODO: Should we be training even if we stop grabbing inducing points?

torch.Size([100, 1]) torch.Size([100, 1]) torch.Size([100, 1])
tensor(0.9496, grad_fn=<MeanBackward0>)
Iter 1/50 - Loss: 0.950
tensor(0.7553, grad_fn=<MeanBackward0>)
tensor(0.7072, grad_fn=<MeanBackward0>)
tensor(0.6677, grad_fn=<MeanBackward0>)
tensor(0.6290, grad_fn=<MeanBackward0>)
tensor(0.5896, grad_fn=<MeanBackward0>)
Iter 6/50 - Loss: 0.590
tensor(0.5493, grad_fn=<MeanBackward0>)
tensor(0.5084, grad_fn=<MeanBackward0>)
tensor(0.4667, grad_fn=<MeanBackward0>)
tensor(0.4244, grad_fn=<MeanBackward0>)
tensor(0.3814, grad_fn=<MeanBackward0>)
Iter 11/50 - Loss: 0.381
Failed to add inducing point, breaking




In [101]:
model.state_dict()

OrderedDict([('likelihood.noise_covar.raw_noise', tensor([-1.1085])),
             ('likelihood.noise_covar.raw_noise_constraint.lower_bound',
              tensor(1.0000e-04)),
             ('likelihood.noise_covar.raw_noise_constraint.upper_bound',
              tensor(inf)),
             ('mean_module.raw_constant', tensor(-0.0521)),
             ('covar_module.inducing_points',
              tensor([[0.9091],
                      [0.5051],
                      [0.6465],
                      [0.1818],
                      [0.2424],
                      [0.5253],
                      [0.8687],
                      [0.9697],
                      [0.0404],
                      [0.4242],
                      [0.7475]])),
             ('covar_module.base_kernel.raw_outputscale', tensor(-0.5443)),
             ('covar_module.base_kernel.base_kernel.raw_lengthscale',
              tensor([[0.6043]])),
             ('covar_module.base_kernel.base_kernel.raw_lengthscale_constraint.

In [11]:
for param_name, param in model.named_parameters():
    print(param_name)
    print(param)

likelihood.noise_covar.raw_noise
Parameter containing:
tensor([-1.2103], requires_grad=True)
mean_module.raw_constant
Parameter containing:
tensor(-0.0336, requires_grad=True)
covar_module.inducing_points
Parameter containing:
tensor([0.8281, 0.2827, 0.5352, 0.5454, 0.2727, 0.8184, 0.6162, 0.8584, 0.6362,
        0.9194, 0.8887, 0.5557, 0.4243, 0.6265, 0.3535, 0.3232, 0.9292, 0.1515,
        0.3738, 0.6665, 0.0909, 0.9897, 0.3435, 0.1313, 0.1919, 0.0000, 0.7578,
        0.2930, 0.1010, 0.8687, 0.7173, 0.4949, 0.0101, 0.7070, 0.4546, 0.2424,
        0.8486, 0.8989, 0.4343, 0.3333, 0.0202, 0.4041, 0.4849],
       dtype=torch.float16)
covar_module.base_kernel.raw_outputscale
Parameter containing:
tensor(-0.6861, requires_grad=True)
covar_module.base_kernel.base_kernel.raw_lengthscale
Parameter containing:
tensor([[0.4648]], requires_grad=True)


In [54]:
a = torch.Tensor([[1,2,3,4],[5,6,7,8]])
b = torch.Tensor([[10,11],[12,14]])

In [68]:
a = a.reshape(-1,2,2)
b = b.reshape(-1,2,2)
print(a,b)
print(a.size(),b.size())

tensor([[[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]]]) tensor([[[10., 11.],
         [12., 14.]]])
torch.Size([2, 2, 2]) torch.Size([1, 2, 2])


In [69]:
c = torch.cat((a,b),dim=0)

In [70]:
c.size()

torch.Size([3, 2, 2])

In [72]:
a

tensor([[[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]]])

In [73]:
d = torch.Tensor([1])

In [76]:
from LowPrecisionApproxGP.util import greedy_select_points, greedy_train

ModuleNotFoundError: No module named 'LowPrecisionApproxGP'