# Imports & Setup

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel
from sklearn.preprocessing import StandardScaler
from scipy.stats import norm
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

# Project Utils (assuming load_data is in utils.py as per previous setup)
from utils import load_data

# Set random seed for reproducibility
np.random.seed(42)

print("Libraries loaded. Ready for Bayesian Optimization.")

Libraries loaded. Ready for Bayesian Optimization.


# Strategy Configuration

In [4]:
def get_strategy(func_id):
    """
    Returns the exploration/exploitation parameter (kappa)
    and any specific flags for the function based on Week 2 Analysis.
    """
    # Default balanced strategy
    strategy = {
        'kappa': 2.576,  # 99% confidence interval
        'description': 'Balanced',
        'biased_sampling': False
    }

    # GROUP 1: Exploitation (Strong Correlations)
    if func_id in [2, 5, 6]:
        strategy['kappa'] = 1.96 # 95% CI - More greedy
        strategy['description'] = 'Exploitation (Strong Signal)'

    # GROUP 2: Exploration (Noisy/Weak Correlations)
    elif func_id in [1, 7]:
        strategy['kappa'] = 5.0 # Very high variance tolerance
        strategy['description'] = 'High Exploration (Unknown Regions)'

    # GROUP 3: Domain Knowledge / Biased Sampling
    elif func_id == 8:
        strategy['kappa'] = 1.96
        strategy['description'] = 'Exploitation + Dim Reduction'
        strategy['biased_sampling'] = True # Special flag for Func 8

    # Function 3: The conflict zone (Correlation says low, Model says mid)
    # We act on your report's decision: Trust the Model (Exploration)
    elif func_id == 3:
        strategy['kappa'] = 3.0
        strategy['description'] = 'Model Trust (Resolving Linear/Non-linear conflict)'

    return strategy

# Optimizer Engine

In [6]:
def suggest_next_point(func_id, X_train, y_train):
    strategy = get_strategy(func_id)
    print(f"Optimizing Function {func_id}: {strategy['description']}")

    # 1. Preprocessing
    # Scale targets to mean=0, std=1. Critical for Function 5 (High magnitude)
    # and Function 2 (Noisy).
    scaler = StandardScaler()
    y_scaled = scaler.fit_transform(y_train.reshape(-1, 1)).flatten()

    # 2. Gaussian Process Definition
    # Matern kernel is less smooth than RBF, better for jagged real-world functions
    # WhiteKernel accounts for observation noise (sigma_n^2)
    kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5) + WhiteKernel(noise_level=0.1)

    gpr = GaussianProcessRegressor(kernel=kernel,
                                   n_restarts_optimizer=20,
                                   normalize_y=False) # We scaled manually

    gpr.fit(X_train, y_scaled)

    # 3. Candidate Generation (The "Search Space")
    n_dim = X_train.shape[1]
    n_samples = 50000

    # Start with uniform random candidates [0, 1]
    X_candidates = np.random.uniform(0, 1, (n_samples, n_dim))

    # --- Feature Engineering / Biased Sampling for Function 8 ---
    if strategy['biased_sampling']:
        print("   -> Applying Biased Sampling (Low X1, X3)")
        # Replace 50% of candidates with points where X1 and X3 are < 0.2
        # This reflects your EDA finding of strong negative correlation
        n_bias = int(n_samples * 0.5)

        # We force X1 and X3 (indices 0 and 2) to be small
        X_candidates[:n_bias, 0] = np.random.uniform(0, 0.15, n_bias)
        X_candidates[:n_bias, 2] = np.random.uniform(0, 0.15, n_bias)

    # 4. Acquisition Function (UCB)
    # mu + kappa * sigma
    mu, std = gpr.predict(X_candidates, return_std=True)
    ucb_scores = mu + strategy['kappa'] * std

    # 5. Select Best Point
    best_idx = np.argmax(ucb_scores)
    next_point = X_candidates[best_idx]

    # Logic check for Func 5 (ensure we aren't predicting negative yield if known positive)
    if func_id == 5:
        # Just a sanity check, though GP handles it via correlation
        pass

    return next_point

# Execution and Formatting

In [11]:
# Dictionary to store results
submission_queries = {}

print(f"{'Func':<5} | {'Dim':<3} | {'Kappa':<5} | {'Strategy'}")
print("-" * 60)

for func_id in range(1, 9):
    # Load Data
    X_known, y_known = load_data(func_id)

    # Run Optimization
    next_x = suggest_next_point(func_id, X_known, y_known)

    # Store result
    submission_queries[func_id] = next_x

    # Visual sanity check of the chosen point vs known bounds
    # (checking if we are duplicating existing points)
    # dists = np.linalg.norm(X_known - next_x, axis=1)
    # if np.min(dists) < 0.01:
    #     print(f"WARNING: Function {func_id} suggested point is very close to existing data.")

print("\n" + "="*30)
print("FORMATTED SUBMISSION OUTPUT")
print("="*30)

for func_id, x_val in submission_queries.items():
    # Format: 0.123456 - 0.654321
    formatted_str = "-".join([f"{val:.6f}" for val in x_val])
    print(f"function_number: {func_id}: {formatted_str}")

Func  | Dim | Kappa | Strategy
------------------------------------------------------------
Optimizing Function 1: High Exploration (Unknown Regions)
Optimizing Function 2: Exploitation (Strong Signal)
Optimizing Function 3: Model Trust (Resolving Linear/Non-linear conflict)
Optimizing Function 4: Balanced
Optimizing Function 5: Exploitation (Strong Signal)
Optimizing Function 6: Exploitation (Strong Signal)
Optimizing Function 7: High Exploration (Unknown Regions)
Optimizing Function 8: Exploitation + Dim Reduction
   -> Applying Biased Sampling (Low X1, X3)

FORMATTED SUBMISSION OUTPUT
function_number: 1: 0.884284-0.583209
function_number: 2: 0.681235-0.915951
function_number: 3: 0.456167-0.004151-0.502531
function_number: 4: 0.420631-0.372807-0.413533-0.403621
function_number: 5: 0.660788-0.927179-0.999546-0.975879
function_number: 6: 0.303218-0.102412-0.609419-0.996531-0.040102
function_number: 7: 0.015457-0.424697-0.654002-0.104147-0.365154-0.698135
function_number: 8: 0.009870-0.