In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern
from scipy.stats import norm

In [5]:
# Load the initial data
X = np.load('data/initial_inputs.npy')
Y = np.load('data/initial_outputs.npy')

print("X shape:", X.shape)
print("Y shape:", Y.shape)
        
# Define the function to be optimized (assuming the real function is unknown)
def objective_function(x):
    # This function should be replaced with the actual evaluation of the objective.
    # Here it is assumed as a placeholder.
    return np.sin(10 * np.pi * x) / x

# Random Search within a restricted area
def random_search_around_point(point, scale, n_samples=100):
    samples = np.random.normal(point, scale, size=(n_samples, len(point)))
    samples = np.clip(samples, 0, 1)  # Ensure samples are within bounds
    evaluations = np.array([objective_function(sample) for sample in samples])
    max_idx = np.argmax(evaluations)
    return samples[max_idx], evaluations[max_idx]

X shape: (100, 2)
Y shape: (100,)


In [15]:
from sklearn.preprocessing import StandardScaler

# Bayesian Optimization with UCB
def bayesian_optimization(X, Y, n_iterations=10):
    kernel = RBF(length_scale=1.0) + Matern(length_scale=1.0)
    gpr = GaussianProcessRegressor(kernel=kernel)
    
    scaler_X = StandardScaler()
    scaler_Y = StandardScaler()
    
    X_scaled = scaler_X.fit_transform(X)
    Y_scaled = scaler_Y.fit_transform(Y.reshape(-1, 1)).ravel()

    for iteration in range(n_iterations):
        gpr.fit(X_scaled, Y_scaled)

        # Create the grid
        x_grid = np.linspace(0, 1, 100)
        X_grid = np.array([[x1, x2] for x1 in x_grid for x2 in x_grid])
        
        # Generate a prediction grid using the scaled X values
        X_pred_grid = scaler_X.transform(X_grid)
        mean, std = gpr.predict(X_pred_grid, return_std=True)

        # Calculate UCB on the scaled prediction grid
        ucb = mean + 1.96 * std
        next_idx = np.argmax(ucb)

        # Get the next query point in the original scale
        next_point_original_scale = X_grid[next_idx]
        next_eval = objective_function(next_point_original_scale)

        # Update the original data with the new sample
        X = np.vstack([X, next_point_original_scale])
        Y = np.append(Y, next_eval)

        # Rescale the updated data for the next iteration
        X_scaled = scaler_X.transform(X)
        Y_scaled = scaler_Y.fit_transform(Y.reshape(-1, 1)).ravel()

        # Debugging print statements
        print(f"Iteration {iteration}:")
        print("X shape:", X.shape)
        print("Y shape:", Y.shape)
        
    # Correctly indented return statement
    return X, Y


In [16]:

# Perform Random Search as a baseline
next_query = np.random.uniform(size=2)
print("Next query Random Search:", next_query)

Next query Random Search: [0.89102626 0.0446369 ]


In [17]:


# Perform Bayesian Optimization
X_opt, Y_opt = bayesian_optimization(X, Y)
best_idx = np.argmax(Y_opt)
print("Best point Bayesian Optimization:", X_opt[best_idx])

Iteration 0:
X shape: (101, 2)
Y shape: (102,)




ValueError: Found input variables with inconsistent numbers of samples: [101, 102]

In [4]:


# Visualize the initial data
plt.figure(figsize=(8, 5))
plt.scatter(X[:, 0], X[:, 1], c=Y, cmap='viridis')
plt.colorbar(label='Function value')
plt.xlabel('x1')
plt.ylabel('x2')
plt.title('Initial Data Scatter Plot')
plt.show()

# Run random search around a hint point
hint_point = np.array([0.6262626262626263, 0.6767676767676768])
next_query_random, eval_random = random_search_around_point(hint_point, scale=0.01)
print("Next query Random Search around hint point:", next_query_random)

# Visualize the Bayesian Optimization process
plt.figure(figsize=(8, 5))
plt.scatter(X_opt[:, 0], X_opt[:, 1], c=Y_opt, cmap='viridis')
plt.colorbar(label='Function value')
plt.xlabel('x1')
plt.ylabel('x2')
plt.title('Bayesian Optimization Points')
plt.show()


Next query Random Search: [0.38249774 0.92931143]
Iteration 0:
X shape: (101, 2)
Y shape: (102,)


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


ValueError: Found input variables with inconsistent numbers of samples: [101, 102]