# 自動調參工具


## hyperopt

Hyperopt is a versatile and general-purpose optimization library that can be used for a wide range of optimization problems, including but not limited to machine learning.

https://github.com/hyperopt/hyperopt


## hyperopt-sklearn
Hyperopt-sklearn is a specialized tool built on Hyperopt, designed specifically to simplify hyperparameter optimization for scikit-learn models. It offers ease of use and predefined configurations, making it ideal for users working within the scikit-learn framework.

https://github.com/hyperopt/hyperopt-sklearn





In [13]:
import pandas as pd
import numpy as np

from sklearn import metrics
from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier

from functools import partial
from skopt import space, gp_minimize

from hyperopt import hp, fmin, tpe, Trials, STATUS_OK, STATUS_FAIL, space_eval
from hyperopt.pyll.base import scope




## 定義代理函數

In [14]:
# Function definition for model optimization
def optimize_model(params, x, y):
    
    model = RandomForestClassifier(**params)
    kf = StratifiedKFold(n_splits=5)
    accuracies = []

    for idx in kf.split(X=x, y=y):
        train_idx, test_idx = idx[0], idx[1]
        x_train = x[train_idx]
        y_train = y[train_idx]

        x_test = x[test_idx]
        y_test = y[test_idx]

        model.fit(x_train, y_train)
        preds = model.predict(x_test)

        fold_acc = metrics.accuracy_score(y_test, preds)
        accuracies.append(fold_acc)

        return -1.0 * np.mean(accuracies)


In [15]:
if __name__ == "__main__":
    # Load the dataset
    df = pd.read_csv("./mobile_price_data/train.csv")
    
    # Separate features (X) and target variable (y)
    X = df.drop("price_range", axis=1).values
    y = df["price_range"].values

    param_space = {
        "max_depth": scope.int(hp.quniform("max_depth", 3, 15, 1)),  # The maximum depth of the trees
        "n_estimators": scope.int(hp.quniform("n_estimators", 100, 600, 1)),  # The number of trees in the forest
        "criterion": hp.choice("criterion", ["gini", "entropy"]),  # The function to measure the quality of a split
        "max_features": hp.uniform("max_features", 0.01, 1)  # The number of features to consider for the best split
    }

    # Create a partial function for optimization, passing fixed arguments
    optimize_function = partial(
        optimize_model,
        x=X,
        y=y
    )

    trials = Trials()

    # Run the optimization
    result = fmin(
        fn=optimize_function,
        space=param_space,
        algo=tpe.suggest,
        max_evals=15,
        trials=trials
    )

    print(result)



100%|██████████| 15/15 [00:12<00:00,  1.17trial/s, best loss: -0.9125]
{'criterion': 1, 'max_depth': 7.0, 'max_features': 0.7521270896486781, 'n_estimators': 439.0}
