In [72]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score,train_test_split
import torch
import gpytorch
from gpytorch.models import ExactGP
from gpytorch.mlls import ExactMarginalLogLikelihood
from gpytorch.kernels import ScaleKernel, RBFKernel
from gpytorch.distributions import MultivariateNormal
from gpytorch.likelihoods import GaussianLikelihood
import tensorflow
from typing import Optional, Tuple

import torch
from linear_operator import to_dense
from gpytorch.constraints import Positive
from gpytorch.kernels import Kernel
from IPython.display import Markdown, display
def printmd(string):
    display(Markdown(string))
from sklearn.preprocessing import StandardScaler
from gpytorch.settings import cholesky_jitter


In [73]:

# Load the dataset
data = pd.read_csv('Real estate.csv') #https://www.kaggle.com/datasets/quantbruce/real-estate-price-prediction?resource=download
data = data.drop(['No'], axis=1)
data.info()
data = data.dropna()
# Display the first few rows of the DataFrame
# print(data.head())
x = data.drop(columns=['Y house price of unit area'])
y = data['Y house price of unit area']
train_x,test_x,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)


# Normalize the features
scaler = StandardScaler()
train_x_scaled = scaler.fit_transform(train_x) # computes the mean and standard deviation for each feature in train_x and scales train_x using these statistics
test_x_scaled = scaler.transform(test_x)


# Convert the pandas DataFrame/Series to PyTorch tensors
train_x = torch.tensor(train_x_scaled, dtype=torch.float32)
test_x = torch.tensor(test_x_scaled, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32)

# print(train_x)
# print(y_train.shape)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 7 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   X1 transaction date                     414 non-null    float64
 1   X2 house age                            414 non-null    float64
 2   X3 distance to the nearest MRT station  414 non-null    float64
 3   X4 number of convenience stores         414 non-null    int64  
 4   X5 latitude                             414 non-null    float64
 5   X6 longitude                            414 non-null    float64
 6   Y house price of unit area              414 non-null    float64
dtypes: float64(6), int64(1)
memory usage: 22.8 KB


# Newton Girard Formulae

## the Newton Girard Formulae kernel function

In [74]:
class NewtonGirardAdditiveKernel(Kernel):
    def __init__(
        self,
        base_kernel: Kernel,
        num_dims: int,
        max_degree: Optional[int] = None,
        active_dims: Optional[Tuple[int, ...]] = None,
        **kwargs,
    ):
        """Create an Additive Kernel a la https://arxiv.org/abs/1112.4394 using Newton-Girard Formulae

        :param base_kernel: a base 1-dimensional kernel. NOTE: put ard_num_dims=d in the base kernel...
        :param max_degree: the maximum numbers of kernel degrees to compute
        :param active_dims:
        :param kwargs:
        """
        super(NewtonGirardAdditiveKernel, self).__init__(active_dims=active_dims, **kwargs)

        self.base_kernel = base_kernel
        self.num_dims = num_dims
        if max_degree is None:
            self.max_degree = self.num_dims
        elif max_degree > self.num_dims:  # force cap on max_degree (silently)
            self.max_degree = self.num_dims
        else:
            self.max_degree = max_degree

        self.register_parameter(
            name="raw_outputscale", parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, self.max_degree))
        )
        outputscale_constraint = Positive()
        self.register_constraint("raw_outputscale", outputscale_constraint)
        self.outputscale_constraint = outputscale_constraint
        self.outputscale = [1 / self.max_degree for _ in range(self.max_degree)]

    @property
    def outputscale(self):
        return self.raw_outputscale_constraint.transform(self.raw_outputscale)

    @outputscale.setter
    def outputscale(self, value):
        self._set_outputscale(value)

    def _set_outputscale(self, value):
        if not torch.is_tensor(value):
            value = torch.as_tensor(value).to(self.raw_outputscale)

        self.initialize(raw_outputscale=self.outputscale_constraint.inverse_transform(value))

    def forward(self, x1, x2, diag=False, last_dim_is_batch=False, **params):
        """Forward proceeds by Newton-Girard formulae"""
        if last_dim_is_batch:
            raise RuntimeError("NewtonGirardAdditiveKernel does not accept the last_dim_is_batch argument.")

        # NOTE: comments about shape are only correct for the single-batch cases.
        # kern_values is just the order-1 terms
        # kern_values = D x n x n unless diag=True
        kern_values = to_dense(self.base_kernel(x1, x2, diag=diag, last_dim_is_batch=True, **params))

        # last dim is batch, which gets moved up to pos. 


        kernel_dim = -3 if not diag else -2

        shape = [1 for _ in range(len(kern_values.shape) + 1)]
      
        shape[kernel_dim - 1] = -1
        kvals = torch.arange(1, self.max_degree + 1, device=kern_values.device).reshape(*shape)
        # kvals = R x 1 x 1 x 1 (these are indexes only)

        # e_n = torch.ones(self.max_degree+1, *kern_values.shape[1:], device=kern_values.device)  # includes 0
        # e_n: elementary symmetric polynomial of degree n (e.g. z1 z2 + z1 z3 + z2 z3)
        # e_n is R x n x n, and the array is properly 0 indexed.
        shape = [d_ for d_ in kern_values.shape]
        shape[kernel_dim] = self.max_degree + 1
        e_n = torch.empty(*shape, device=kern_values.device)
        if kernel_dim == -3:
            e_n[..., 0, :, :] = 1.0
        else:
            e_n[..., 0, :] = 1.0

        # power sums s_k (e.g. sum_i^num_dims z_i^k
        # s_k is R x n x n
        s_k = kern_values.unsqueeze(kernel_dim - 1).pow(kvals).sum(dim=kernel_dim)

        # just the constant -1
        m1 = torch.tensor([-1], dtype=torch.float, device=kern_values.device)

        shape = [1 for _ in range(len(kern_values.shape))]
        shape[kernel_dim] = -1
        for deg in range(1, self.max_degree + 1):  # deg goes from 1 to R (it's 1-indexed!)
            # we avg over k [1, ..., deg] (-1)^(k-1)e_{deg-k} s_{k}

            ks = torch.arange(1, deg + 1, device=kern_values.device, dtype=torch.float).reshape(*shape)  # use for pow
            kslong = torch.arange(1, deg + 1, device=kern_values.device, dtype=torch.long)  # use for indexing

            # note that s_k is 0-indexed, so we must subtract 1 from kslong
            sum_ = (
                m1.pow(ks - 1) * e_n.index_select(kernel_dim, deg - kslong) * s_k.index_select(kernel_dim, kslong - 1)
            ).sum(dim=kernel_dim) / deg
            if kernel_dim == -3:
                e_n[..., deg, :, :] = sum_
            else:
                e_n[..., deg, :] = sum_

        if kernel_dim == -3:
            return (self.outputscale.unsqueeze(-1).unsqueeze(-1) * e_n.narrow(kernel_dim, 1, self.max_degree)).sum(
                dim=kernel_dim
            )
        else:
            return (self.outputscale.unsqueeze(-1) * e_n.narrow(kernel_dim, 1, self.max_degree)).sum(dim=kernel_dim)

## the code with GPytorch predictions (newton)

In [75]:
class MyGP(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ZeroMean()
        dimensionality = train_x.size(-1)  # Assuming last dimension is the dimensionality
        base_kernel = gpytorch.kernels.RBFKernel()  # Or any other base kernel you want to use
        num_dims = dimensionality
        self.covar_module = NewtonGirardAdditiveKernel(base_kernel, num_dims)

    def forward(self, x): 
        mean = self.mean_module(x)
        covar = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean, covar)


# Create the GP model
likelihood = gpytorch.likelihoods.GaussianLikelihood()

model = MyGP(train_x, y_train.squeeze(-1), likelihood)
model.eval()
with torch.no_grad():
    untrained_pred_dist = likelihood(model(test_x))
    predictive_mean = untrained_pred_dist.mean
    lower, upper = untrained_pred_dist.confidence_region()
# Set up optimizer and marginal log likelihood
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

# print(output.mean.shape)  # Check the shape of the mean
# print(output.variance.shape)  # Check the shape of the variance
# print(y_train.squeeze(-1).shape)

# print(y_train.size())
model.train()
likelihood.train()
# Training loop
training_iter = 1000
for i in range(training_iter):
    optimizer.zero_grad()
    output = model(train_x)
    # print(output)
    loss = -mll(output, y_train)
    loss = loss.mean() 
    loss.backward()
    # print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
    #     i + 1, training_iter, loss.item(),
    #     model.covar_module.base_kernel.lengthscale.item(),
    #     model.likelihood.noise.item()
    # ))
    optimizer.step()

model.eval()
with torch.no_grad():
    trained_pred_dist = likelihood(model(test_x))
    predictive_mean = trained_pred_dist.mean
    lower, upper = trained_pred_dist.confidence_region()
# Viewing model parameters after training
for param_name, param in model.named_parameters():
    print(f'Parameter name: {param_name:42} value = {param.data}')


#evaluating there is a problem when the test_y and test_x have float numbers
with torch.no_grad():
    model.eval()  # Set the model to evaluation mode (mode is for computing predictions through the model posterior.)
    likelihood.eval()
    output = likelihood(model(test_x))  # Make predictions on new data 
    


for constraint_name, constraint in model.named_constraints():
    print(f'Constraint name: {constraint_name:55} constraint = {constraint}')


# Extracting means and standard deviations
predicted_means = output.mean.numpy() 
predicted_stddevs = output.stddev.numpy()  # Extract standard deviations

print("Predicted Means:")
print(predicted_means)

print("Predicted Standard Deviations:")
print(predicted_stddevs)




Parameter name: likelihood.noise_covar.raw_noise           value = tensor([5.1058])
Parameter name: covar_module.raw_outputscale               value = tensor([13.0054,  3.5289,  0.9505,  1.1713,  1.5054,  1.8005])
Parameter name: covar_module.base_kernel.raw_lengthscale   value = tensor([[-1.3867]])
Constraint name: likelihood.noise_covar.raw_noise_constraint             constraint = GreaterThan(1.000E-04)
Constraint name: covar_module.raw_outputscale_constraint                 constraint = Positive()
Constraint name: covar_module.base_kernel.raw_lengthscale_constraint     constraint = Positive()
Predicted Means:
[48.73434   38.872406  44.027412  36.111267  28.714663  48.152573
 45.783546  45.607086  14.170515  58.75998   24.513943  32.307228
 29.93573   14.751556  32.472305  24.473934  38.846596  50.589516
 23.799442  39.33092    9.840057  24.759457  49.933304  45.647102
 14.8702545 33.56215   16.175327  46.18608   46.306427  40.907146
 18.738731  30.415497  35.968544  26.347605  47.3

In [76]:
from sklearn.metrics import mean_absolute_error,mean_squared_error, mean_absolute_percentage_error 
mae = mean_absolute_error(y_true=y_test, 
                          y_pred=predicted_means) 
print("Mean Absolute Error", mae) 

mse = mean_squared_error(y_true=y_test, 
                          y_pred=predicted_means) 
print("Mean Square Error", mse)


rmse = mean_squared_error(y_true=y_test, 
                          y_pred=predicted_means,
                          squared=False) 
print("Root Mean Square Error", rmse) 

Mean Absolute Error 4.239747
Mean Square Error 39.729267
Root Mean Square Error 6.303116


## the math formula (newton)

In [77]:
# Viewing model parameters after training
for param_name, param in model.named_parameters():
    print(f'Parameter name: {param_name:42} value = {param.data}')

# Ensure model is in evaluation mode
model.eval()

likelihood.eval()
with torch.no_grad():
    #this is the training kernel matrix (K(X,X))
    t_k_matrix = model.covar_module(train_x).evaluate()
    #this is the noise matrix
    n_matrix = torch.eye(t_k_matrix.size(-1)).to(t_k_matrix.device) * likelihood.noise_covar.raw_noise
    # This is (K + sigma^2_n * I)^(-1)
    K_inv = torch.inverse(t_k_matrix + n_matrix)
    # alpha it the K_inv * y
    alpha = torch.matmul(K_inv, model.train_targets.unsqueeze(-1))
    # This is kernel matrix between the test points and training points, K(x*,X)
    K_star = model.covar_module(test_x, train_x).evaluate()
    #This is th ekernel matrix between all pairs of the test points (K(x*,x*))
    K_star_star = model.covar_module(test_x).evaluate()

    # Predictive mean calculation (mu_*)
    pred_mean = torch.matmul(K_star, alpha)
    # Predictive variance calculation (sigma^2_*)
    pred_covar = K_star_star - torch.matmul(K_star, torch.matmul(K_inv, K_star.transpose(-1, -2)))

    # Extract standard deviations from the covariance matrix
    pred_stddev = torch.sqrt(torch.diag(pred_covar))

# You can now print or return the predicted means and standard deviations
print("Predicted Means:", pred_mean.squeeze(-1))
print("Predicted Standard Deviations:", pred_stddev)
print('likelihood', likelihood.noise)

Parameter name: likelihood.noise_covar.raw_noise           value = tensor([5.1058])
Parameter name: covar_module.raw_outputscale               value = tensor([13.0054,  3.5289,  0.9505,  1.1713,  1.5054,  1.8005])
Parameter name: covar_module.base_kernel.raw_lengthscale   value = tensor([[-1.3867]])
Predicted Means: tensor([48.7340, 38.8721, 44.0266, 36.1102, 28.7152, 48.1524, 45.7826, 45.6059,
        14.1703, 58.7590, 24.5139, 32.3072, 29.9358, 14.7514, 32.4720, 24.4727,
        38.8458, 50.5899, 23.7994, 39.3306,  9.8399, 24.7585, 49.9329, 45.6467,
        14.8698, 33.5613, 16.1754, 46.1854, 46.3066, 40.9064, 18.7390, 30.4153,
        35.9683, 26.3478, 47.3605, 35.0029, 53.5174, 15.5136, 44.3811, 42.8791,
        46.8518, 43.6434, 42.5226, 35.6895, 38.0978, 46.4193, 34.6776, 17.2684,
        47.5913, 42.4224, 46.4023, 54.4327, 37.2544, 34.4825, 37.7829, 16.1016,
        35.5342, 26.2869, 27.1752, 49.9869, 25.5816, 24.8923, 16.1016, 12.6942,
        19.6203, 25.7337, 22.3084, 37.3252

In [78]:

from sklearn.metrics import mean_absolute_error,mean_squared_error
mae = mean_absolute_error(y_true=y_test, 
                          y_pred=pred_mean) 
print("Mean Absolute Error", mae) 

mse = mean_squared_error(y_true=y_test, 
                          y_pred=pred_mean) 
print("Mean Square Error", mse)


rmse = mean_squared_error(y_true=y_test, 
                          y_pred=pred_mean,
                          squared=False) 
print("Root Mean Square Error", rmse) 

Mean Absolute Error 4.239864
Mean Square Error 39.73109
Root Mean Square Error 6.3032603


In [79]:
init_mse = gpytorch.metrics.mean_squared_error(untrained_pred_dist, y_test, squared=True)
final_mse = gpytorch.metrics.mean_squared_error(trained_pred_dist, y_test, squared=True)

print(f'Untrained model MSE: {init_mse:.2f}, \nTrained model MSE: {final_mse:.2f}')

init_mae = gpytorch.metrics.mean_absolute_error(untrained_pred_dist, y_test)
final_mae = gpytorch.metrics.mean_absolute_error(trained_pred_dist, y_test)

print(f'Untrained model MAE: {init_mae:.2f}, \nTrained model MAE: {final_mae:.2f}')

Untrained model MSE: 45.04, 
Trained model MSE: 39.73
Untrained model MAE: 4.65, 
Trained model MAE: 4.24


# DP approach

## DP kernel function

In [80]:
class DPkernel(gpytorch.kernels.Kernel):
    def __init__(self, base_kernel, num_dims, q_additivity, **kwargs):
        super().__init__(**kwargs)
        self.base_kernel = base_kernel
        self.num_dims = num_dims
        self.q_additivity = q_additivity
        self.register_parameter(
            name="raw_outputscale", 
            parameter=torch.nn.Parameter(torch.zeros(1, self.q_additivity))
        )
        self.outputscale_constraint = gpytorch.constraints.Positive()
        self.register_constraint("raw_outputscale", self.outputscale_constraint)

    @property
    def outputscale(self):
        return self.outputscale_constraint.transform(self.raw_outputscale).squeeze()

    @outputscale.setter
    def outputscale(self, value):
        if not torch.is_tensor(value):
            value = torch.tensor(value, device=self.raw_outputscale.device)
        self.initialize(raw_outputscale=self.outputscale_constraint.inverse_transform(value))

    def forward(self, x1, x2, diag=False, **params):
    # Determine sizes based on input matrices
        x1_size = x1.size(0)
        x2_size = x2.size(0)
        
        # Initialize matrices based on input sizes
        result = torch.zeros(x1_size, x2_size, device=x1.device) #initialize the result matrix
        sum_order_b = torch.zeros(x1_size, x2_size, device=x1.device) # initialize the matrix for the matrix for a single order
        kernels =[] # list were the z1, z2,... would be stored

        # print(f"Initial x1 shape: {x1.shape}, x2 shape: {x2.shape}")
        
        #calculations for first order
        #calcualte the kernels for each dimentions
        for d in range(self.num_dims):
            x1_d = x1[:, d:d+1]
            x2_d = x2[:, d:d+1]
            k_d = self.base_kernel(x1_d, x2_d).evaluate()
            kernels.append(k_d) #save them in order in the kernels list
            # print(f"Kernel k_d at dim {d} shape: {k_d.shape}, sum_order_b shape: {sum_order_b.shape}")

            sum_order_b += k_d # add each one dimension kernels to one matrix for first order

        result += sum_order_b * self.outputscale[0] #add the first order kernel miltiplied by first outputscale

        # Compute higher order interactions
        for i in range(1, self.q_additivity):
            temp_sum = torch.zeros(x1_size, x2_size, device=x1.device)
            new_kernels = []
            for j in range(self.num_dims):
                for k in range(j + 1, self.num_dims):
                    new_kernel = kernels[j] * kernels[k]
                    new_kernels.append(new_kernel)
                    temp_sum += new_kernel

            kernels = new_kernels  # update kernels list with new order interactions
            result += temp_sum * self.outputscale[i]

        return result 

## the code with GPytorch predictions DP


In [81]:
# Example usage in a GP model
class MyGP(gpytorch.models.ExactGP): # i need to find a diferent model
    def __init__(self, train_x, train_y, likelihood):
        super(MyGP, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ZeroMean()
        self.base_kernel = gpytorch.kernels.RBFKernel()
        self.covar_module = DPkernel(base_kernel=self.base_kernel, num_dims=train_x.size(-1), q_additivity=train_x.size(-1))

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)  # Make sure to pass x twice WHY
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# Create the GP model
likelihood = gpytorch.likelihoods.GaussianLikelihood()


model = MyGP(train_x, y_train.squeeze(-1), likelihood)
model.eval()
with torch.no_grad():
    untrained_pred_dist = likelihood(model(test_x))
    predictive_mean = untrained_pred_dist.mean
    lower, upper = untrained_pred_dist.confidence_region()
# Set up optimizer and marginal log likelihood
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

model.train()
likelihood.train()
# Training loop
training_iter = 1000
for i in range(training_iter):
    optimizer.zero_grad()
    output = model(train_x)
    # print(output)
    loss = -mll(output, y_train)
    loss = loss.mean() 
    loss.backward()
    # print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
    #     i + 1, training_iter, loss.item(),
    #     model.covar_module.base_kernel.lengthscale.item(),
    #     model.likelihood.noise.item()
    # ))
    optimizer.step()
# print('likelihood noise', likelihood.noise)
# print('likelihood noise raw', likelihood.noise_covar.raw_noise)
model.eval()
with torch.no_grad():
    trained_pred_dist = likelihood(model(test_x))
    predictive_mean = trained_pred_dist.mean
    lower, upper = trained_pred_dist.confidence_region()
# Viewing model parameters after training
for param_name, param in model.named_parameters():
    print(f'Parameter name: {param_name:42} value = {param.data}')


#evaluating there is a problem when the test_y and test_x have float numbers
with torch.no_grad():
    model.eval()  # Set the model to evaluation mode (mode is for computing predictions through the model posterior.)
    likelihood.eval()
    output = likelihood(model(test_x))  # Make predictions on new data 
    


for constraint_name, constraint in model.named_constraints():
    print(f'Constraint name: {constraint_name:55} constraint = {constraint}')


# Extracting means and standard deviations
predicted_means = output.mean.numpy() 
predicted_stddevs = output.stddev.numpy()  # Extract standard deviations

print("Predicted Means:")
print(predicted_means)

print("Predicted Standard Deviations:")
print(predicted_stddevs)




Parameter name: likelihood.noise_covar.raw_noise           value = tensor([10.7944])
Parameter name: base_kernel.raw_lengthscale                value = tensor([[-0.2703]])
Parameter name: covar_module.raw_outputscale               value = tensor([[18.9805,  0.4021, -4.8879, -4.8592, -3.6123,  3.6725]])
Constraint name: likelihood.noise_covar.raw_noise_constraint             constraint = GreaterThan(1.000E-04)
Constraint name: base_kernel.raw_lengthscale_constraint                  constraint = Positive()
Constraint name: covar_module.raw_outputscale_constraint                 constraint = Positive()
Predicted Means:
[48.30478   36.26541   44.5175    42.56601   22.602028  46.21573
 45.832596  46.602676  14.5451355 58.374115  24.146881  32.236923
 36.057983  17.502106  33.503197  25.148178  44.687363  47.98326
 24.895126  39.810844   9.809639  26.884705  53.238693  44.00853
 16.462639  35.190857  16.391552  53.190887  40.439262  41.478546
 17.46334   33.521492  37.44832   25.873825  45.1

In [82]:
mae = mean_absolute_error(y_true=y_test, 
                          y_pred=predicted_means) 
print("Mean Absolute Error", mae) 

mse = mean_squared_error(y_true=y_test, 
                          y_pred=predicted_means) 
print("Mean Square Error", mse)


rmse = mean_squared_error(y_true=y_test, 
                          y_pred=predicted_means,
                          squared=False) 
print("Root Mean Square Error", rmse) 

Mean Absolute Error 4.149475
Mean Square Error 38.107735
Root Mean Square Error 6.1731462


## the math formula DP

In [83]:
# Viewing model parameters after training
for param_name, param in model.named_parameters():
    print(f'Parameter name: {param_name:42} value = {param.data}')

# Ensure model is in evaluation mode
model.eval()

likelihood.eval()
with torch.no_grad():
    #this is the training kernel matrix (K(X,X))
    t_k_matrix = model.covar_module(train_x).evaluate()
    #this is the noise matrix
    n_matrix = torch.eye(t_k_matrix.size(-1)).to(t_k_matrix.device) * likelihood.noise_covar.raw_noise
    # This is (K + sigma^2_n * I)^(-1)
    K_inv = torch.inverse(t_k_matrix + n_matrix)
    # alpha it the K_inv * y
    alpha = torch.matmul(K_inv, model.train_targets.unsqueeze(-1))
    # This is kernel matrix between the test points and training points, K(x*,X)
    K_star = model.covar_module(test_x, train_x).evaluate()
    #This is th ekernel matrix between all pairs of the test points (K(x*,x*))
    K_star_star = model.covar_module(test_x).evaluate()

    # Predictive mean calculation (mu_*)
    pred_mean = torch.matmul(K_star, alpha)
    # Predictive variance calculation (sigma^2_*)
    pred_covar = K_star_star - torch.matmul(K_star, torch.matmul(K_inv, K_star.transpose(-1, -2)))

    # Extract standard deviations from the covariance matrix
    pred_stddev = torch.sqrt(torch.diag(pred_covar))

# You can now print or return the predicted means and standard deviations
print("Predicted Means:", pred_mean.squeeze(-1))
print("Predicted Standard Deviations:", pred_stddev)
print('likelihood', likelihood.noise)

Parameter name: likelihood.noise_covar.raw_noise           value = tensor([10.7944])
Parameter name: base_kernel.raw_lengthscale                value = tensor([[-0.2703]])
Parameter name: covar_module.raw_outputscale               value = tensor([[18.9805,  0.4021, -4.8879, -4.8592, -3.6123,  3.6725]])
Predicted Means: tensor([48.3056, 36.2659, 44.5181, 42.5665, 22.6022, 46.2165, 45.8330, 46.6031,
        14.5454, 58.3750, 24.1471, 32.2372, 36.0581, 17.5024, 33.5036, 25.1483,
        44.6882, 47.9840, 24.8952, 39.8115,  9.8097, 26.8850, 53.2394, 44.0092,
        16.4627, 35.1913, 16.3916, 53.1917, 40.4400, 41.4790, 17.4635, 33.5219,
        37.4490, 25.8741, 45.1173, 38.1199, 55.8328, 15.7393, 44.4786, 45.4272,
        49.4690, 39.7816, 43.6416, 37.1267, 36.4754, 48.9983, 36.7415, 19.1290,
        48.1863, 41.2861, 48.7412, 54.6846, 39.3551, 40.1428, 37.1681, 19.2500,
        35.6286, 29.3131, 22.4671, 47.5451, 27.1948, 25.8696, 19.2500, 12.5314,
        19.9850, 26.2668, 24.0178, 39.2

In [84]:
mae = mean_absolute_error(y_true=y_test, 
                          y_pred=pred_mean) 
print("Mean Absolute Error", mae) 

mse = mean_squared_error(y_true=y_test, 
                          y_pred=pred_mean) 
print("Mean Square Error", mse)


rmse = mean_squared_error(y_true=y_test, 
                          y_pred=pred_mean,
                          squared=False) 
print("Root Mean Square Error", rmse) 

Mean Absolute Error 4.149397
Mean Square Error 38.10757
Root Mean Square Error 6.173133


In [85]:

init_mse = gpytorch.metrics.mean_squared_error(untrained_pred_dist, y_test, squared=True)
final_mse = gpytorch.metrics.mean_squared_error(trained_pred_dist, y_test, squared=True)

print(f'Untrained model MSE: {init_mse:.2f}, \nTrained model MSE: {final_mse:.2f}')

init_mae = gpytorch.metrics.mean_absolute_error(untrained_pred_dist, y_test)
final_mae = gpytorch.metrics.mean_absolute_error(trained_pred_dist, y_test)

print(f'Untrained model MAE: {init_mae:.2f}, \nTrained model MAE: {final_mae:.2f}')

Untrained model MSE: 46.52, 
Trained model MSE: 38.11
Untrained model MAE: 4.51, 
Trained model MAE: 4.15
