In [1]:
import torch
import gpytorch
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from gpytorch.mlls import SumMarginalLogLikelihood

In [7]:
DATA_FILEPATH = "/home/hansm/active_learning/Double_pendulum/data/"
file_path_train_inputs = DATA_FILEPATH + 'train_inputs.csv'
df_train_inputs = pd.read_csv(file_path_train_inputs)
train_inputs = df_train_inputs.values

file_path_train_outputs = DATA_FILEPATH + 'train_outputs.csv'
df_train_outputs = pd.read_csv(file_path_train_outputs)
train_outputs = df_train_outputs.values

# Set the number of rows you want to choose
num_rows_to_choose = 1000

# Choose 2000 random indices
random_indices = np.random.choice(train_inputs.shape[0], size=num_rows_to_choose, replace=False)

# Select the corresponding rows from each array
train_inputs = train_inputs[random_indices]
train_outputs = train_outputs[random_indices]

#train_inputs = torch.tensor(train_inputs, dtype=torch.float32)

In [4]:
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
    
    
def train_gp(training_inputs, training_outputs, training_iterations):
  likelihood = gpytorch.likelihoods.GaussianLikelihood()
  training_inputs = torch.tensor(training_inputs, dtype=torch.float32)
  model_list = []

  for col_index in range(training_outputs.shape[1]):
     training_output_column = torch.tensor(training_outputs[:, col_index], dtype=torch.float32)
     model = ExactGPModel(training_inputs, training_output_column, likelihood)
     model_list.append(model)


  gp_model_list = gpytorch.models.IndependentModelList(model_list[0], model_list[1], model_list[2], model_list[3])
  likelihood_list = gpytorch.likelihoods.LikelihoodList(model_list[0].likelihood, model_list[1].likelihood, model_list[2].likelihood, model_list[3].likelihood)

  mll = SumMarginalLogLikelihood(likelihood_list, gp_model_list)

  # Find optimal model hyperparameters
  gp_model_list.train()
  likelihood_list.train()

  # Use the Adam optimizer
  optimizer = torch.optim.Adam(gp_model_list.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters

  for i in range(training_iterations):
      optimizer.zero_grad()
      output = gp_model_list(*gp_model_list.train_inputs)
      loss = -mll(output, gp_model_list.train_targets)
      loss.backward()
      print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
      optimizer.step()

  return likelihood, gp_model_list

def gp_eval(test_inputs, model, likelihood):
  test_inputs = torch.tensor(test_inputs, dtype=torch.float32)
  # Set into eval mode
  model.eval()
  likelihood.eval()


  # Make predictions (use the same test points)
  with torch.no_grad(), gpytorch.settings.fast_pred_var():
      
      # This contains predictions for both outcomes as a list
      predictions = likelihood(*model(test_inputs, test_inputs, test_inputs, test_inputs))
      final_prediction = np.column_stack((predictions[0].mean.numpy(), predictions[1].mean.numpy(), predictions[2].mean.numpy(), predictions[3].mean.numpy()))
      variance_lst = []
      for prediction in predictions:
        lower, upper = prediction.confidence_region()
        variance = upper.numpy() - lower.numpy()
        variance_lst.append(variance)
  return final_prediction, np.column_stack(variance_lst)

In [None]:
gp_model, gp_likelihood = train_gp(train_inputs, train_outputs, 100)

In [10]:
print(type(gp_model))

<class 'gpytorch.likelihoods.gaussian_likelihood.GaussianLikelihood'>


In [None]:
# Set into eval mode
gp_model.eval()
gp_likelihood.eval()


# Make predictions (use the same test points)
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    # This contains predictions for both outcomes as a list
    predictions = gp_likelihood(*gp_model(test_inputs, test_inputs, test_inputs, test_inputs))
    variance_lst = []
    for prediction in predictions:
        lower, upper = prediction.confidence_region()
        variance = upper.numpy() - lower.numpy()
        variance_lst.append(variance)

In [None]:
file_path_test_inputs = DATA_FILEPATH + 'test_inputs.csv'
df_test_inputs = pd.read_csv(file_path_test_inputs)
test_inputs = df_test_inputs.values

file_path_test_outputs = DATA_FILEPATH + 'test_outputs.csv'
df_test_outputs = pd.read_csv(file_path_test_outputs)
test_outputs = df_test_outputs.values

test_inputs = torch.tensor(test_inputs, dtype=torch.float32)

In [None]:
prediction, var = gp_eval(test_inputs, gp_model, gp_likelihood)