In [1]:
import torch
import gpytorch
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from gpytorch.mlls import SumMarginalLogLikelihood

In [3]:
DATA_FILEPATH = "/home/hansm/active_learning/Double_pendulum/data/"
file_path_train_inputs = DATA_FILEPATH + 'train_inputs.csv'
df_train_inputs = pd.read_csv(file_path_train_inputs)
train_inputs = df_train_inputs.values

file_path_train_outputs = DATA_FILEPATH + 'train_outputs.csv'
df_train_outputs = pd.read_csv(file_path_train_outputs)
train_outputs = df_train_outputs.values

# Set the number of rows you want to choose
num_rows_to_choose = 1000

# Choose 2000 random indices
random_indices = np.random.choice(train_inputs.shape[0], size=num_rows_to_choose, replace=False)

# Select the corresponding rows from each array
train_inputs = train_inputs[random_indices]
train_outputs = train_outputs[random_indices]

train_inputs = torch.tensor(train_inputs, dtype=torch.float32)

In [4]:
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [5]:
train_outputs_theta1 = torch.tensor(train_outputs[:, 0], dtype=torch.float32)
train_outputs_theta2 = torch.tensor(train_outputs[:, 1], dtype=torch.float32)
train_outputs_omega1 = torch.tensor(train_outputs[:, 2], dtype=torch.float32)
train_outputs_omega2 = torch.tensor(train_outputs[:, 3], dtype=torch.float32)

In [6]:
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


likelihood = gpytorch.likelihoods.GaussianLikelihood()

model1 = ExactGPModel(train_inputs, train_outputs_theta1, likelihood)
model2 = ExactGPModel(train_inputs, train_outputs_theta2, likelihood)

model3 = ExactGPModel(train_inputs, train_outputs_omega1, likelihood)
model4 = ExactGPModel(train_inputs, train_outputs_omega2, likelihood)



In [7]:
model = gpytorch.models.IndependentModelList(model1, model2, model3, model4)
likelihood = gpytorch.likelihoods.LikelihoodList(model1.likelihood, model2.likelihood, model3.likelihood, model4.likelihood)

In [8]:
from gpytorch.mlls import SumMarginalLogLikelihood

mll = SumMarginalLogLikelihood(likelihood, model)

In [9]:
training_iterations = 50
# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters

for i in range(training_iterations):
    optimizer.zero_grad()
    output = model(*model.train_inputs)
    loss = -mll(output, model.train_targets)
    loss.backward()
    print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
    optimizer.step()

Iter 1/50 - Loss: 1.402
Iter 2/50 - Loss: 1.278
Iter 3/50 - Loss: 1.174
Iter 4/50 - Loss: 1.080
Iter 5/50 - Loss: 0.997
Iter 6/50 - Loss: 0.929
Iter 7/50 - Loss: 0.866
Iter 8/50 - Loss: 0.810
Iter 9/50 - Loss: 0.752
Iter 10/50 - Loss: 0.704
Iter 11/50 - Loss: 0.658
Iter 12/50 - Loss: 0.613
Iter 13/50 - Loss: 0.564
Iter 14/50 - Loss: 0.523
Iter 15/50 - Loss: 0.479
Iter 16/50 - Loss: 0.437
Iter 17/50 - Loss: 0.394
Iter 18/50 - Loss: 0.353
Iter 19/50 - Loss: 0.313
Iter 20/50 - Loss: 0.266
Iter 21/50 - Loss: 0.232
Iter 22/50 - Loss: 0.198
Iter 23/50 - Loss: 0.161
Iter 24/50 - Loss: 0.130
Iter 25/50 - Loss: 0.089
Iter 26/50 - Loss: 0.065
Iter 27/50 - Loss: 0.039
Iter 28/50 - Loss: 0.007
Iter 29/50 - Loss: -0.026
Iter 30/50 - Loss: -0.069
Iter 31/50 - Loss: -0.091
Iter 32/50 - Loss: -0.116
Iter 33/50 - Loss: -0.149
Iter 34/50 - Loss: -0.185
Iter 35/50 - Loss: -0.209
Iter 36/50 - Loss: -0.217
Iter 37/50 - Loss: -0.248
Iter 38/50 - Loss: -0.276
Iter 39/50 - Loss: -0.294
Iter 40/50 - Loss: -0.3

In [11]:
file_path_test_inputs = DATA_FILEPATH + 'test_inputs.csv'
df_test_inputs = pd.read_csv(file_path_test_inputs)
test_inputs = df_test_inputs.values

file_path_test_outputs = DATA_FILEPATH + 'test_outputs.csv'
df_test_outputs = pd.read_csv(file_path_test_outputs)
test_outputs = df_test_outputs.values

test_inputs = torch.tensor(test_inputs, dtype=torch.float32)

In [12]:
# Set into eval mode
model.eval()
likelihood.eval()


# Make predictions (use the same test points)
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    # This contains predictions for both outcomes as a list
    predictions = likelihood(*model(test_inputs, test_inputs, test_inputs, test_inputs))
    variance_lst = []
    for prediction in predictions:
        lower, upper = prediction.confidence_region()
        variance = upper.numpy() - lower.numpy()
        variance_lst.append(variance)

In [13]:
print(type(model))

<class 'gpytorch.models.model_list.IndependentModelList'>


In [11]:
print(variance.shape)

(6561,)


In [12]:
pred_theta1 = predictions[0].mean.numpy()
pred_theta2 = predictions[1].mean.numpy()
pred_omega1 = predictions[2].mean.numpy()
pred_omega2 = predictions[3].mean.numpy()
final_prediction = np.column_stack((pred_theta1, pred_theta2, pred_omega1, pred_omega2))
#test_outputs = test_outputs[:, 2:4]

RMSE = np.sqrt(mean_squared_error(test_outputs, final_prediction))

In [13]:
print(RMSE)

0.13050346633320617


In [21]:
print(RMSE)

0.12951670184468683


In [18]:
print(test_outputs.shape)

(6561, 0)
