# Week 7: Hyperparameter Tuning & Adaptive Surrogates

## Strategy: Tuning the Tuner
With 15 data points, using a fixed Neural Network architecture for all 8 functions is suboptimal. 
In this round, we implement **Per-Function Hyperparameter Tuning**:
1. **Meta-Optimization**: For each function, we run a `RandomizedSearchCV` to find the best `hidden_layer_sizes` and `alpha` (regularization) that minimizes Cross-Validation error on the 15 known points.
2. **Adaptive Complexity**: This allows the strategy to automatically choose shallow networks for simple functions and deep networks for complex ones.
3. **Tuned Trust Regions**: We then use the *optimized* model within our Trust Region framework to suggest the next query.

In [13]:
import numpy as np
import warnings
from sklearn.neural_network import MLPRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from scipy.optimize import minimize
from scipy.stats import loguniform
import sys
import os

# Ensure we can import from src
sys.path.append(os.path.abspath('..'))
from src.utils import load_data

warnings.filterwarnings("ignore")
np.random.seed(47) # Week 7 Seed

print("Ready for Hyperparameter Tuning")

Ready for Hyperparameter Tuning


In [14]:
def tune_surrogate_model(X, y):
    """
    Performs RandomizedSearchCV to find best MLP architecture for the specific function.
    """
    # Define Hyperparameter Space
    param_dist = {
        'hidden_layer_sizes': [(32,), (64,), (32, 32), (64, 32), (128, 64)],
        'alpha': [0.0001, 0.001, 0.01, 0.1], # Regularization strength
        'activation': ['tanh', 'relu'],
    }
    
    # Base model
    mlp = MLPRegressor(solver='lbfgs', max_iter=2000, random_state=42)
    
    # Search (LOOCV style - using 3-fold here as approximation for speed/robustness)
    # n_iter=10 tries 10 random combinations
    search = RandomizedSearchCV(mlp, param_dist, n_iter=15, cv=3, 
                                scoring='neg_mean_squared_error', n_jobs=-1, random_state=42)
    
    search.fit(X, y)
    
    print(f"   Best Params: {search.best_params_}")
    return search.best_params_

In [15]:
def suggest_next_point_tuned(func_id, X_train, y_train):
    print(f"--- Optimizing Function {func_id} (Tuned) ---")
    
    # 1. Preprocessing
    scaler_x = StandardScaler()
    X_scaled = scaler_x.fit_transform(X_train)
    
    scaler_y = StandardScaler()
    y_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
    
    # 2. Hyperparameter Tuning (The "AutoML" Step)
    # Find the best architecture for THIS function's data
    best_params = tune_surrogate_model(X_scaled, y_scaled)
    
    # 3. Train Ensemble with Best Params
    # We use the *found* params but retrain 3 times with different seeds for robustness
    nn_ensemble = []
    seeds = [42, 101, 999]
    for seed in seeds:
        model = MLPRegressor(solver='lbfgs', max_iter=3000, random_state=seed, **best_params)
        model.fit(X_scaled, y_scaled)
        nn_ensemble.append(model)
    
    # GP Anchor (Keeping GP fixed as a reliable baseline)
    kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5) + WhiteKernel(noise_level=0.1)
    gp_model = GaussianProcessRegressor(kernel=kernel, normalize_y=False)
    gp_model.fit(X_scaled, y_scaled)
    
    # 4. Objective Function (UCB + Repulsion Penalty)
    def objective_function(x):
        x_reshaped = x.reshape(1, -1)
        
        # NN Predictions & Uncertainty
        nn_preds_list = [m.predict(x_reshaped)[0] for m in nn_ensemble]
        avg_nn_pred = np.mean(nn_preds_list)
        std_nn_pred = np.std(nn_preds_list)
        
        # GP Predictions & Uncertainty
        gp_pred, gp_std = gp_model.predict(x_reshaped, return_std=True)
        gp_pred = gp_pred[0]
        gp_std = gp_std[0]
        
        # 4a. Combined Mean and Uncertainty
        combined_mean = 0.6 * avg_nn_pred + 0.4 * gp_pred
        combined_std = 0.6 * std_nn_pred + 0.4 * gp_std
        
        # 4b. UCB Acquisition
        kappa = 1.96
        ucb = combined_mean + kappa * combined_std
        
        # 4c. Repulsion Penalty (Avoid returning x_start_scaled)
        dist_sq = np.sum((x_reshaped - x_start_scaled.reshape(1, -1))**2)
        penalty_weight = 10.0
        length_scale = 0.1
        penalty = penalty_weight * np.exp(-dist_sq / (2 * length_scale**2))
        
        return -ucb + penalty

    # 5. Trust Region Optimization
    best_idx = np.argmax(y_train)
    x_start_original = X_train[best_idx]
    x_start_scaled = scaler_x.transform(x_start_original.reshape(1, -1)).flatten()
    
    radius = 0.2
    bounds_scaled = []
    for i in range(X_train.shape[1]):
        mean, scale = scaler_x.mean_[i], scaler_x.scale_[i]
        curr_val = x_start_original[i]
        lower_real = max(0.0, curr_val - radius)
        upper_real = min(1.0, curr_val + radius)
        bounds_scaled.append(((lower_real - mean) / scale, (upper_real - mean) / scale))
    
    # 5a. Perturbed Initialization to escape the hole
    epsilon = np.random.uniform(-0.1, 0.1, size=x_start_scaled.shape)
    x_init = x_start_scaled + epsilon
    
    res = minimize(fun=objective_function, x0=x_init, method='L-BFGS-B', 
                   bounds=bounds_scaled, options={'maxiter': 100})
    
    x_optimized_scaled = res.x.reshape(1, -1)
    next_point = scaler_x.inverse_transform(x_optimized_scaled).flatten()
    return np.clip(next_point, 0.0, 1.0)

In [16]:
submission_queries = {}
print(f"{'Func':<5} | {'Optimizing...'}")
print("-" * 30)

for func_id in range(1, 9):
    # Ensure you have updated data to 15 points before running this
    X_known, y_known = load_data(func_id)
    next_x = suggest_next_point_tuned(func_id, X_known, y_known)
    submission_queries[func_id] = next_x

print("\n" + "="*30)
print("FORMATTED SUBMISSION OUTPUT")
print("="*30)

for func_id, x_val in submission_queries.items():
    formatted_str = "-".join([f"{val:.6f}" for val in x_val])
    print(f"function_number: {func_id}: {formatted_str}")

Func  | Optimizing...
------------------------------
--- Optimizing Function 1 (Tuned) ---
   Best Params: {'hidden_layer_sizes': (32, 32), 'alpha': 0.01, 'activation': 'tanh'}
--- Optimizing Function 2 (Tuned) ---
   Best Params: {'hidden_layer_sizes': (32, 32), 'alpha': 0.01, 'activation': 'tanh'}
--- Optimizing Function 3 (Tuned) ---
   Best Params: {'hidden_layer_sizes': (64,), 'alpha': 0.001, 'activation': 'tanh'}
--- Optimizing Function 4 (Tuned) ---
   Best Params: {'hidden_layer_sizes': (64,), 'alpha': 0.001, 'activation': 'relu'}
--- Optimizing Function 5 (Tuned) ---
   Best Params: {'hidden_layer_sizes': (64, 32), 'alpha': 0.01, 'activation': 'tanh'}
--- Optimizing Function 6 (Tuned) ---
   Best Params: {'hidden_layer_sizes': (32, 32), 'alpha': 0.001, 'activation': 'relu'}
--- Optimizing Function 7 (Tuned) ---
   Best Params: {'hidden_layer_sizes': (128, 64), 'alpha': 0.01, 'activation': 'relu'}
--- Optimizing Function 8 (Tuned) ---
   Best Params: {'hidden_layer_sizes': (128