# 自動調參工具
## optuna

Optuna has modern functionalities as follows:

- Lightweight, versatile, and platform agnostic architecture
    - Handle a wide variety of tasks with a simple installation that has few requirements.

- Pythonic search spaces
    - Define search spaces using familiar Python syntax including conditionals and loops.

- Efficient optimization algorithms
    - Adopt state-of-the-art algorithms for sampling hyperparameters and efficiently pruning unpromising trials.

- Easy parallelization
    - Scale studies to tens or hundreds of workers with little or no changes to the code.

- Quick visualization
    - Inspect optimization histories from a variety of plotting functions.


https://github.com/optuna/optuna


In [8]:
import pandas as pd
import numpy as np

from sklearn import metrics
from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier

from functools import partial
from skopt import space, gp_minimize

from hyperopt import hp, fmin, tpe, Trials, STATUS_OK, STATUS_FAIL, space_eval
from hyperopt.pyll.base import scope

import optuna


## 定義代理函數

In [9]:
# Function definition for model optimization
def optimize_model(trial, x, y):
    
    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy'])
    n_estimators = trial.suggest_int('n_estimators', 100, 1500)
    max_depth = trial.suggest_int('max_depth', 3, 15)
    max_features = trial.suggest_float('max_features', 0.01, 1.0)

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        max_features=max_features,
        criterion=criterion,
    )

    
    kf = StratifiedKFold(n_splits=5)
    accuracies = []

    for idx in kf.split(X=x, y=y):
        train_idx, test_idx = idx[0], idx[1]
        x_train = x[train_idx]
        y_train = y[train_idx]

        x_test = x[test_idx]
        y_test = y[test_idx]

        model.fit(x_train, y_train)
        preds = model.predict(x_test)

        fold_acc = metrics.accuracy_score(y_test, preds)
        accuracies.append(fold_acc)

        return -1.0 * np.mean(accuracies)


In [10]:
if __name__ == "__main__":
    # Load the dataset
    df = pd.read_csv("./mobile_price_data/train.csv")
    
    # Separate features (X) and target variable (y)
    X = df.drop("price_range", axis=1).values
    y = df["price_range"].values

    opt_model = partial(optimize_model, x=X, y=y)

    study = optuna .create_study(direction="minimize", study_name="Random Forest Optimization")
    study.optimize(opt_model, n_trials=10)

    # print(result)



[I 2024-05-24 11:49:32,186] A new study created in memory with name: Random Forest Optimization
[I 2024-05-24 11:49:32,560] Trial 0 finished with value: -0.8575 and parameters: {'criterion': 'entropy', 'n_estimators': 234, 'max_depth': 8, 'max_features': 0.15940697907744003}. Best is trial 0 with value: -0.8575.
[I 2024-05-24 11:49:36,313] Trial 1 finished with value: -0.89 and parameters: {'criterion': 'gini', 'n_estimators': 808, 'max_depth': 11, 'max_features': 0.9845331997460428}. Best is trial 1 with value: -0.89.
[I 2024-05-24 11:49:39,034] Trial 2 finished with value: -0.88 and parameters: {'criterion': 'gini', 'n_estimators': 1477, 'max_depth': 14, 'max_features': 0.24470090310618905}. Best is trial 1 with value: -0.89.
[I 2024-05-24 11:49:41,698] Trial 3 finished with value: -0.885 and parameters: {'criterion': 'gini', 'n_estimators': 1402, 'max_depth': 8, 'max_features': 0.34687600016429004}. Best is trial 1 with value: -0.89.
[I 2024-05-24 11:49:46,223] Trial 4 finished with