In [10]:
import torch
import numpy as np
from scipy import special
import matplotlib.pyplot as plt
import sklearn
from sklearn.gaussian_process.kernels import DotProduct, RBF, WhiteKernel
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.datasets import make_friedman2
from sklearn.model_selection import train_test_split

# Set random seed for reproducibility
np.random.seed(1995)
torch.manual_seed(1995)

# Generate 2D data (Method 1: Adding a second feature based on sin(x_obs))
x_obs = np.array(np.linspace(-50, 50, 1000)).reshape(-1, 1)
x_obs_2d = np.hstack([x_obs, np.sin(x_obs)])  # Adding sin(x_obs) as second feature

# You can also use Method 2: Uncomment the following line to use random 2D input
# x_obs_2d = np.random.uniform(low=-50, high=50, size=(1000, 2))

# Gaussian Process with DotProduct and RBF kernel
kernel = DotProduct(sigma_0=20) * RBF(length_scale=5.0) + WhiteKernel(1)
gp = GaussianProcessRegressor(kernel=kernel, random_state=0)

# Generate noisy observations
noise = 10 * np.random.randn(x_obs_2d.shape[0])
y_obs = gp.sample_y(x_obs_2d, n_samples=1, random_state=0).reshape(-1, 1) + noise.reshape(-1, 1)

# Normalize y_obs (optional)
y_obs = y_obs / 10

# Split data into training and test sets with 500 test points
x_train, x_test_unsorted, y_train, y_test_unsorted = train_test_split(x_obs_2d, y_obs, test_size=1000, random_state=42)

# Get the indices that would sort x_test by the first feature (for visualization purposes)
sorted_indices = np.argsort(x_test_unsorted[:, 0])

# Sort both arrays using the sorted indices
x_test = x_test_unsorted[sorted_indices]
y_test = y_test_unsorted[sorted_indices]

# Now you have a 2D input dataset (x_obs_2d) and corresponding outputs (y_obs)
print(f"x_train shape: {x_train.shape}")
print(f"x_test shape: {x_test.shape}")

# data = np.vstack((x_obs.transpose(), y_obs.T))
# train_set, val_set = torch.utils.data.random_split(data.T, [500,500])


# Create plot
fig, ax = plt.subplots(figsize=(10, 5))
#ax.plot(x_true, y_true, 'b-', linewidth=3, label="True function")
ax.scatter(x_obs, y_obs, color='C0', marker='o',label="Observations")
#ax.set_xlim(xlims)
#ax.set_ylim(ylims)
ax.set_xlabel("X", fontsize=30)
ax.set_ylabel("Y", fontsize=30)
ax.legend(loc=4, fontsize=15, frameon=False)

plt.show()

ValueError: test_size=1000 should be either positive and smaller than the number of samples 1000 or a float in the (0, 1) range