## Bayesian optimization using Gaussian Processes

In [1]:
import pandas as pd
import numpy as np

from sklearn import metrics
from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier

from functools import partial
from skopt import space, gp_minimize


## 定義代理函數

In [2]:
# Function definition for model optimization
def optimize_model(params, param_names, x, y):
    
    '''
    # This function should contain the logic for training and evaluating the model.
    # The params argument will be a list of parameter values.
    # The param_names argument is a list of parameter names.
    # The x and y arguments are the features and target labels respectively.
    '''

    params = dict(zip(param_names, params))
    model = RandomForestClassifier(**params)
    kf = StratifiedKFold(n_splits=5)
    accuracies = []

    for idx in kf.split(X=x, y=y):
        train_idx, test_idx = idx[0], idx[1]
        x_train = x[train_idx]
        y_train = y[train_idx]

        x_test = x[test_idx]
        y_test = y[test_idx]

        model.fit(x_train, y_train)
        preds = model.predict(x_test)

        fold_acc = metrics.accuracy_score(y_test, preds)
        accuracies.append(fold_acc)

        return -1.0 * np.mean(accuracies)


## 根據參數空間進行高斯優化 (GP)

In [3]:

if __name__ == "__main__":
    # Load the dataset
    df = pd.read_csv("./mobile_price_data/train.csv")
    
    # Separate features (X) and target variable (y)
    X = df.drop("price_range", axis=1).values
    y = df["price_range"].values

    # Define the hyperparameter search space
    param_space = [
        space.Integer(3, 15, name="max_depth"),  # The maximum depth of the trees
        space.Integer(100, 600, name="n_estimators"),  # The number of trees in the forest
        space.Categorical(["gini", "entropy"], name="criterion"),  # The function to measure the quality of a split
        space.Real(0.01, 1, prior="uniform", name="max_features")  # The number of features to consider for the best split
    ]

    # List of parameter names
    param_names = ["max_depth", "n_estimators", "criterion", "max_features"]

    # Create a partial function for optimization, passing fixed arguments
    optimize_function = partial(
        optimize_model,
        param_names=param_names,
        x=X,
        y=y
    )

    # Perform Bayesian optimization to find the best hyperparameters
    result = gp_minimize(
        optimize_function,  # The objective function to minimize
        dimensions=param_space,  # The search space for hyperparameters
        n_calls=15,  # The number of calls to the objective function
        n_random_starts=10,  # The number of random initialization points
        verbose=10  # Verbose output during optimization
    )

    # Print the best found hyperparameters
    print("Best parameters found:", dict(zip(param_names, result.x)))
    # Print the best accuracy (negative because we minimize the negative accuracy)
    print("Best accuracy:", -result.fun)


Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.9439
Function value obtained: -0.8925
Current minimum: -0.8925
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.9619
Function value obtained: -0.8675
Current minimum: -0.8925
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 1.5833
Function value obtained: -0.9025
Current minimum: -0.9025
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 1.2612
Function value obtained: -0.9125
Current minimum: -0.9125
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.7623
Function value obtained: -0.9100
Current minimum: -0.9125
Iteration No: 6 started. 