# Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel
from sklearn.svm import SVR, SVC
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from scipy.stats import norm
import warnings

warnings.filterwarnings("ignore")

# Helper to load data (assumes utils.py exists)
from utils import load_data

# Set seed
np.random.seed(43) # New seed for new week
print("Ready for Week 3: SVM Integration")

Ready for Week 3: SVM Integration


# SVM-Assisted Logic

In [2]:
def suggest_next_point_hybrid(func_id, X_train, y_train):
    print(f"--- Optimizing Function {func_id} ---")

    # 1. Preprocessing
    # SVMs are VERY sensitive to scale. We must scale Inputs [0,1] and Targets.
    # X is usually already roughly [0,1], but let's enforce checks if needed.

    # Scale Y for better regression performance
    scaler_y = StandardScaler()
    y_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()

    # 2. Define Models

    # A. Gaussian Process (The Explorer)
    kernel_gp = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5) + WhiteKernel(noise_level=0.1)
    gpr = GaussianProcessRegressor(kernel=kernel_gp, n_restarts_optimizer=10, normalize_y=False)
    gpr.fit(X_train, y_scaled)

    # B. Support Vector Machine (The Teacher)
    # We choose Strategy based on Dimension/Complexity
    use_svc_filter = False
    use_svr_validation = False

    if func_id in [6, 7, 8]:
        use_svc_filter = True # High dimensions: Prune space first
        print("   Strategy: SVC Classification Filter + GP")
    elif func_id in [2, 3, 4, 5]:
        use_svr_validation = True # Mid dimensions: SVR Consensus
        print("   Strategy: SVR Consensus + GP")
    else:
        print("   Strategy: Pure GP (Exploration focus)")

    # 3. Candidate Generation
    n_dim = X_train.shape[1]
    n_candidates = 100_000
    X_candidates = np.random.uniform(0, 1, (n_candidates, n_dim))

    # Apply Biased Sampling for Func 8 (Learning from Week 2)
    if func_id == 8:
        # Keep favoring low X1, X3 based on EDA
        n_bias = int(n_candidates * 0.3)
        X_candidates[:n_bias, 0] = np.random.uniform(0, 0.15, n_bias)
        X_candidates[:n_bias, 2] = np.random.uniform(0, 0.15, n_bias)

    # 4. Strategy Execution

    # --- STRATEGY A: SVC Filtering (High Dim) ---
    if use_svc_filter:
        # Create classes: 1 = Top 50% of points, 0 = Bottom 50%
        threshold = np.percentile(y_train, 50)
        y_class = (y_train > threshold).astype(int)

        # Train SVC (with probability estimation)
        # Using RBF kernel to capture non-linear "good" regions
        svc = SVC(kernel='rbf', C=10, probability=True, gamma='scale')
        svc.fit(X_train, y_class)

        # Predict on candidates
        probs = svc.predict_proba(X_candidates)[:, 1] # Probability of being "Good"

        # Filter: Keep only candidates with > 40% chance of being good
        # (Soft threshold to allow some exploration)
        mask = probs > 0.4
        if np.sum(mask) > 100: # Ensure we don't filter everything
            X_candidates = X_candidates[mask]
            print(f"   SVC Filter: Reduced candidates from {n_candidates} to {len(X_candidates)}")
        else:
            print("   SVC Filter: Too restrictive, falling back to full set.")

    # 5. Evaluate with GP (UCB)
    mu, std = gpr.predict(X_candidates, return_std=True)

    # Dynamic Kappa (Exploration factor)
    # Week 3: We have more data, can slightly reduce exploration in well-behaved functions
    kappa = 1.96
    if func_id in [1, 7]: kappa = 5.0 # Still exploring

    ucb_scores = mu + kappa * std

    # --- STRATEGY B: SVR Consensus (Mid Dim) ---
    if use_svr_validation:
        # Train SVR
        svr = SVR(kernel='rbf', C=10, epsilon=0.1)
        svr.fit(X_train, y_scaled)

        # Predict SVR scores for candidates
        svr_pred = svr.predict(X_candidates)

        # Combine Scores: UCB + SVR
        # We want points where GP says "Maybe good (high var)" AND SVR says "Likely good"
        # Weighted average or simple addition
        final_scores = ucb_scores + 0.5 * svr_pred
    else:
        final_scores = ucb_scores

    # 6. Select Best
    best_idx = np.argmax(final_scores)
    next_point = X_candidates[best_idx]

    return next_point

# Main

In [3]:
submission_queries = {}

print(f"{'Func':<5} | {'Strategy Used'}")
print("-" * 40)

for func_id in range(1, 9):
    # Load Data (Now has 11-12 points)
    X_known, y_known = load_data(func_id)

    # Run
    next_x = suggest_next_point_hybrid(func_id, X_known, y_known)
    submission_queries[func_id] = next_x

print("\n" + "="*30)
print("FORMATTED SUBMISSION OUTPUT")
print("="*30)

for func_id, x_val in submission_queries.items():
    formatted_str = " - ".join([f"{val:.6f}" for val in x_val])
    print(f"function_number: {func_id}: {formatted_str}")

Func  | Strategy Used
----------------------------------------
--- Optimizing Function 1 ---
   Strategy: Pure GP (Exploration focus)
--- Optimizing Function 2 ---
   Strategy: SVR Consensus + GP
--- Optimizing Function 3 ---
   Strategy: SVR Consensus + GP
--- Optimizing Function 4 ---
   Strategy: SVR Consensus + GP
--- Optimizing Function 5 ---
   Strategy: SVR Consensus + GP
--- Optimizing Function 6 ---
   Strategy: SVC Classification Filter + GP
   SVC Filter: Reduced candidates from 100000 to 45049
--- Optimizing Function 7 ---
   Strategy: SVC Classification Filter + GP
   SVC Filter: Reduced candidates from 100000 to 50179
--- Optimizing Function 8 ---
   Strategy: SVC Classification Filter + GP
   SVC Filter: Reduced candidates from 100000 to 70873

FORMATTED SUBMISSION OUTPUT
function_number: 1: 0.808075 - 0.798900
function_number: 2: 0.700728 - 0.956521
function_number: 3: 0.506405 - 0.895783 - 0.000570
function_number: 4: 0.429026 - 0.411545 - 0.422947 - 0.401934
function_