# Setup

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import warnings
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel
from scipy.optimize import minimize
from utils import load_data

warnings.filterwarnings("ignore")
np.random.seed(44) # New seed for Week 4

print("Ready for Neural Surrogate Optimization")

Ready for Neural Surrogate Optimization


# Neural Gradient Ascent Strategy

In [11]:
def suggest_next_point_neural_gradient(func_id, X_train, y_train):
    print(f"--- Optimizing Function {func_id} ---")

    # 1. Preprocessing (Critical for NNs)
    # Inputs must be scaled (standard scaling usually better for gradients than MinMax)
    scaler_x = StandardScaler()
    X_scaled = scaler_x.fit_transform(X_train)

    scaler_y = StandardScaler()
    y_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()

    # 2. Train Surrogate Models

    # Model A: Neural Network (The "Gradient Climber")
    # Using L-BFGS because it works much better on small datasets than Adam
    # Tanh activation gives smooth gradients (unlike ReLU which has dead zones)
    nn_model = MLPRegressor(hidden_layer_sizes=(32, 32),
                            activation='tanh',
                            solver='lbfgs',
                            max_iter=2000,
                            random_state=42)
    nn_model.fit(X_scaled, y_scaled)

    # Model B: Gaussian Process (The "Safety Net")
    kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5) + WhiteKernel(noise_level=0.1)
    gp_model = GaussianProcessRegressor(kernel=kernel, normalize_y=False)
    gp_model.fit(X_scaled, y_scaled)

    # 3. Define the Acquisition / Objective Function
    # We want to find x that maximizes (NN_pred + GP_pred)
    # Since scipy.minimize minimizes, we return the negative sum.

    def objective_function(x):
        # x comes in as 1D array. Reshape for sklearn.
        x_reshaped = x.reshape(1, -1)

        # Predict with NN
        nn_pred = nn_model.predict(x_reshaped)

        # Predict with GP
        gp_pred = gp_model.predict(x_reshaped)

        # Weighted Ensemble:
        # Func 8 (High dim) -> Trust GP more (NN overfits easily in 8D with 13 points)
        # Func 2, 5 (Strong signals) -> Trust NN gradients more
        if func_id == 8:
            combined = 0.3 * nn_pred + 0.7 * gp_pred
        else:
            combined = 0.6 * nn_pred + 0.4 * gp_pred

        return -combined[0] # Negative because we want to MAXIMIZE

    # 4. Gradient-Based Optimization (The "Steering")
    # Instead of random sampling, we start from the BEST point we've seen so far
    # and "climb the hill" using the gradients of our surrogate models.

    best_idx = np.argmax(y_train)
    x_start_original = X_train[best_idx]
    x_start_scaled = scaler_x.transform(x_start_original.reshape(1, -1)).flatten()

    # Bounds in scaled space
    # We need to approximate bounds since scaler shifts them.
    # Let's just run optimization and clip later to be safe, or use approximate bounds.
    # L-BFGS-B allows bounds.

    # Run Optimizer
    res = minimize(fun=objective_function,
                   x0=x_start_scaled,
                   method='L-BFGS-B',
                   options={'maxiter': 100, 'eps': 1e-5})

    # 5. Inverse Transform to get real X
    x_optimized_scaled = res.x.reshape(1, -1)
    next_point = scaler_x.inverse_transform(x_optimized_scaled).flatten()

    # Clip to ensure we stay in [0, 1] box
    next_point = np.clip(next_point, 0.0, 1.0)

    # Exploration Jitter
    # If the optimizer got stuck exactly at the start point (gradient is 0), add noise
    if np.linalg.norm(next_point - x_start_original) < 1e-6:
        print("   Gradient stuck, adding exploration jitter.")
        next_point += np.random.normal(0, 0.1, size=next_point.shape)
        next_point = np.clip(next_point, 0.0, 1.0)

    return next_point

# Run and Format

In [13]:
submission_queries = {}
print(f"{'Func':<5} | {'Optimizing...'}")
print("-" * 30)

for func_id in range(1, 9):
    X_known, y_known = load_data(func_id)
    next_x = suggest_next_point_neural_gradient(func_id, X_known, y_known)
    submission_queries[func_id] = next_x

print("\n" + "="*30)
print("FORMATTED SUBMISSION OUTPUT")
print("="*30)

for func_id, x_val in submission_queries.items():
    formatted_str = " - ".join([f"{val:.6f}" for val in x_val])
    print(f"function_number: {func_id}: {formatted_str}")

Func  | Optimizing...
------------------------------
--- Optimizing Function 1 ---
--- Optimizing Function 2 ---
--- Optimizing Function 3 ---
--- Optimizing Function 4 ---
--- Optimizing Function 5 ---
--- Optimizing Function 6 ---
--- Optimizing Function 7 ---
--- Optimizing Function 8 ---

FORMATTED SUBMISSION OUTPUT
function_number: 1: 0.000000-1.000000
function_number: 2: 0.770326-1.000000
function_number: 3: 0.381485-0.407667-0.447654
function_number: 4: 0.400327-0.416657-0.457421-0.423750
function_number: 5: 1.000000-1.000000-1.000000-1.000000
function_number: 6: 0.286724-0.288061-0.514199-0.662913-0.053490
function_number: 7: 0.000000-0.326950-0.413602-0.271175-0.421867-0.538347
function_number: 8: 0.000000-0.363803-0.000000-0.330744-0.715120-0.767185-0.190904-0.626834
