In [8]:
import numpy as np
import pandas as pd
import tensorflow as tf
import gpflow
from gpflow.utilities import set_trainable
from gpflow.kernels import SquaredExponential, White
from gpflow.models import GPMC
from gpflow.likelihoods import Gaussian
from gpflow.kernels import SeparateIndependent, SharedIndependent
# from gpflow.likelihoods import SeparateIndependentGaussian
import matplotlib.pyplot as plt

# Load and prepare your data
data = pd.read_csv('train_test.csv', header=None, names=['timestamp', 'tether_x', 'tether_y', 'tether_z', 'drone_x', 'drone_y', 'drone_z', 'platform_azimuth', 'platform_elevation', 'drone_elevation', 'drone_azimuth', 'drone_yaw', 'length'])

# Inputs (X)
# We need to choose the inputs that are most relevant.
# The `tether_x,y,z` are already a function of the other parameters.
# A good starting point is to use the raw sensor data that generates the tether position.
X_inputs = data[['platform_azimuth', 'platform_elevation', 'drone_elevation', 'length']].values

# It's often good practice to normalize the input data to have zero mean and unit variance
# This helps with the optimization process.
from sklearn.preprocessing import StandardScaler
scaler_x = StandardScaler()
X_norm = scaler_x.fit_transform(X_inputs)

# Outputs (Y)
y = data[['drone_x', 'drone_y', 'drone_z']].values - data[['tether_x', 'tether_y', 'tether_z']].values

# For multi-output GPs, y should have shape [N, D], where N is number of data points and D is number of outputs.
# Your current `y` is already in this format, which is perfect.

# Convert to TensorFlow tensors with the right data type
X = tf.convert_to_tensor(X_norm, dtype=tf.float64)
Y = tf.convert_to_tensor(y, dtype=tf.float64)

# Split data into training and testing sets (optional but good practice)
# Let's use a simple split
N_train = int(0.8 * len(X))
X_train, Y_train = X[:N_train], Y[:N_train]
X_test, Y_test = X[N_train:], Y[N_train:]

print(f"X_train shape: {X_train.shape}")
print(f"Y_train shape: {Y_train.shape}")

X_train shape: (67056, 4)
Y_train shape: (67056, 3)


In [13]:
# Number of inputs and outputs
num_outputs = Y.shape[1]  # 3 (for x, y, z errors)
input_dim = X.shape[1]   # 4 (for p_azimuth, p_elevation, d_elevation, length)

# A good kernel choice is a combination of a basic kernel (like RBF) and a White kernel for noise.
# We will use a SharedIndependent kernel where one kernel is shared across all outputs.
# This implies that the correlation structure is the same for all outputs, but each output
# can have a different scaling.

# Define the shared kernel (e.g., SquaredExponential)
shared_kernel = gpflow.kernels.SquaredExponential(lengthscales=1.0, variance=1.0)
shared_kernel.lengthscales.trainable = True # Ensure lengthscales are trainable

# Build the multi-output kernel
# `num_outputs` is the number of target variables (3: x, y, z errors)
kernel = gpflow.kernels.SharedIndependent(shared_kernel, output_dim=num_outputs)

# The likelihood will handle the observation noise for each output dimension
likelihood = gpflow.likelihoods.Gaussian(variance=1.0) # We'll need a multi-output likelihood here

# For multi-output, it's often better to use a multi-output likelihood
# This allows for a different observation noise variance for each output dimension.
multi_likelihood = gpflow.likelihoods.SeparateIndependent(num_outputs=num_outputs,
                                                        likelihoods=[gpflow.likelihoods.Gaussian() for _ in range(num_outputs)])

# Build the GP model
# We'll use GPR (Gaussian Process Regression) as it's the most straightforward.
model = gpflow.models.GPR(
    (X_train, Y_train), 
    kernel=kernel, 
    likelihood=multi_likelihood # Using a multi-output likelihood
)

# You can inspect the model's parameters
print("Initial model parameters:")
gpflow.utilities.print_summary(model)

AttributeError: module 'gpflow.likelihoods' has no attribute 'SeparateIndependent'