# Week 5: Deep Learning Concepts & Neural Ensembles

## Strategy: Robust Gradient Ascent via Ensembling
With 14 data points, a single Neural Network can be unstable (sensitive to weight initialization).
Inspired by Deep Learning practices (Module 16), we implement:
1. **Neural Ensemble (Bagging)**: Train 3 distinct `MLPRegressor` models with different random seeds.
2. **Averaged Gradients**: The optimizer will climb the *averaged* surface of these 3 networks + GP.
3. **Architectural Trade-offs**: We use a standardized architecture but rely on the ensemble to reduce variance.

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import warnings
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel
from scipy.optimize import minimize
import sys
import os

# Ensure we can import from src
sys.path.append(os.path.abspath('../../../16 Neural Networks and Deep Learning - Part Two - Advanced Concepts'))
from utils import load_data

warnings.filterwarnings("ignore")
np.random.seed(45) # Week 5 Seed

print("Ready for Ensemble Optimization")

Ready for Ensemble Optimization


In [7]:
def suggest_next_point_ensemble(func_id, X_train, y_train):
    print(f"--- Optimizing Function {func_id} (Ensemble) ---")
    
    # 1. Preprocessing
    scaler_x = StandardScaler()
    X_scaled = scaler_x.fit_transform(X_train)
    
    scaler_y = StandardScaler()
    y_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
    
    # 2. Train Ensemble of Neural Networks
    # We train 3 models with different seeds to capture model uncertainty
    nn_ensemble = []
    seeds = [42, 101, 999]
    
    for seed in seeds:
        # L-BFGS is still the king for small data
        model = MLPRegressor(hidden_layer_sizes=(64, 32), # Slightly deeper hierarchy
                             activation='tanh', 
                             solver='lbfgs', 
                             max_iter=2000, 
                             random_state=seed)
        model.fit(X_scaled, y_scaled)
        nn_ensemble.append(model)
    
    # 3. Train Gaussian Process (The Anchor)
    kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5) + WhiteKernel(noise_level=0.1)
    gp_model = GaussianProcessRegressor(kernel=kernel, normalize_y=False)
    gp_model.fit(X_scaled, y_scaled)
    
    # 4. Objective Function (Averaged)
    def objective_function(x):
        x_reshaped = x.reshape(1, -1)
        
        # Get predictions from all NNs
        nn_preds = [model.predict(x_reshaped)[0] for model in nn_ensemble]
        avg_nn_pred = np.mean(nn_preds)
        
        # Get GP prediction
        gp_pred = gp_model.predict(x_reshaped)[0]
        
        # Weighted Combination
        # We trust the Ensemble slightly more now that we have 14 points
        if func_id == 8:
            combined = 0.4 * avg_nn_pred + 0.6 * gp_pred # Still conservative on 8D
        else:
            combined = 0.7 * avg_nn_pred + 0.3 * gp_pred
            
        return -combined # Maximize

    # 5. Optimization with SCALED BOUNDS
    best_idx = np.argmax(y_train)
    x_start_original = X_train[best_idx]
    x_start_scaled = scaler_x.transform(x_start_original.reshape(1, -1)).flatten()
    
    # Calculate bounds in scaled space
    bounds_scaled = []
    for i in range(X_train.shape[1]):
        min_bound = (0.0 - scaler_x.mean_[i]) / scaler_x.scale_[i]
        max_bound = (1.0 - scaler_x.mean_[i]) / scaler_x.scale_[i]
        bounds_scaled.append((min_bound, max_bound))
    
    res = minimize(fun=objective_function, 
                   x0=x_start_scaled, 
                   method='L-BFGS-B', 
                   bounds=bounds_scaled,
                   options={'maxiter': 100, 'eps': 1e-5})
    
    # 6. Inverse Transform
    x_optimized_scaled = res.x.reshape(1, -1)
    next_point = scaler_x.inverse_transform(x_optimized_scaled).flatten()
    next_point = np.clip(next_point, 0.0, 1.0)
    
    return next_point

In [8]:
submission_queries = {}
print(f"{'Func':<5} | {'Optimizing...'}")
print("-" * 30)

for func_id in range(1, 9):
    X_known, y_known = load_data(func_id)
    next_x = suggest_next_point_ensemble(func_id, X_known, y_known)
    submission_queries[func_id] = next_x

print("\n" + "="*30)
print("FORMATTED SUBMISSION OUTPUT")
print("="*30)

for func_id, x_val in submission_queries.items():
    formatted_str = "-".join([f"{val:.6f}" for val in x_val])
    print(f"function_number: {func_id}: {formatted_str}")

Func  | Optimizing...
------------------------------
--- Optimizing Function 1 (Ensemble) ---
--- Optimizing Function 2 (Ensemble) ---
--- Optimizing Function 3 (Ensemble) ---
--- Optimizing Function 4 (Ensemble) ---
--- Optimizing Function 5 (Ensemble) ---
--- Optimizing Function 6 (Ensemble) ---
--- Optimizing Function 7 (Ensemble) ---
--- Optimizing Function 8 (Ensemble) ---

FORMATTED SUBMISSION OUTPUT
function_number: 1: 1.000000-1.000000
function_number: 2: 0.778113-1.000000
function_number: 3: 0.269918-0.229874-0.510852
function_number: 4: 0.431298-0.422796-0.451605-0.465605
function_number: 5: 1.000000-1.000000-1.000000-1.000000
function_number: 6: 0.277153-0.278393-0.561083-0.591506-0.044624
function_number: 7: 0.000000-0.328132-0.372126-0.273588-0.393398-0.558576
function_number: 8: 0.000000-0.359302-0.000000-0.398913-0.657459-0.598171-0.285462-0.609723
