In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel
import xgboost as xgb

# -------------------------------
# 1. Objective Function
# -------------------------------
def objective(params):
    """Objective: Negative CV accuracy (we want to maximize accuracy)."""
    max_depth, learning_rate = int(params[0]), params[1]
    model = xgb.XGBClassifier(
        max_depth=max_depth,
        learning_rate=learning_rate,
        n_estimators=100,
        use_label_encoder=False,
        eval_metric="logloss"
    )
    scores = cross_val_score(model, X, y, cv=3, scoring="accuracy")
    return -np.mean(scores)  # BO minimizes, so we negate accuracy

# -------------------------------
# 2. Acquisition Function (Expected Improvement)
# -------------------------------
def expected_improvement(X_sample, Y_sample, model, X_candidates, xi=0.01):
    mu, sigma = model.predict(X_candidates, return_std=True)
    mu_sample_opt = np.min(Y_sample)  # best observed value

    with np.errstate(divide='warn'):
        imp = mu_sample_opt - mu - xi
        Z = imp / sigma
        ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z)
        ei[sigma == 0.0] = 0.0
    return ei

# -------------------------------
# 3. Bayesian Optimization Loop
# -------------------------------
def bayesian_optimization(n_iters, sample_loss, bounds, n_pre_samples=5):
    X_sample = []
    Y_sample = []

    # Random initial samples
    for _ in range(n_pre_samples):
        x = np.array([np.random.randint(bounds[0][0], bounds[0][1]),
                      np.random.uniform(bounds[1][0], bounds[1][1])])
        y = sample_loss(x)
        X_sample.append(x)
        Y_sample.append(y)

    X_sample = np.array(X_sample)
    Y_sample = np.array(Y_sample)

    # GP kernel: Matern + noise
    kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5) + WhiteKernel()
    model = GaussianProcessRegressor(kernel=kernel, alpha=1e-6)

    for i in range(n_iters):
        model.fit(X_sample, Y_sample)

        # Candidate points
        X_candidates = np.array([[d, lr] for d in range(bounds[0][0], bounds[0][1])
                                 for lr in np.linspace(bounds[1][0], bounds[1][1], 50)])

        # Acquisition
        ei = expected_improvement(X_sample, Y_sample, model, X_candidates)
        next_sample = X_candidates[np.argmax(ei)]

        # Evaluate
        Y_next = sample_loss(next_sample)

        # Update samples
        X_sample = np.vstack((X_sample, next_sample))
        Y_sample = np.append(Y_sample, Y_next)

        print(f"Iteration {i+1}: Best score {-np.min(Y_sample):.4f}")

    return X_sample, Y_sample

# -------------------------------
# 4. Run Experiment
# -------------------------------
from scipy.stats import norm

# Dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Bounds: max_depth [2,10], learning_rate [0.01,0.3]
bounds = [(2, 10), (0.01, 0.3)]

X_sample, Y_sample = bayesian_optimization(n_iters=10, sample_loss=objective, bounds=bounds)


ModuleNotFoundError: No module named 'xgboost'