forked from pfnet-research/optuna-book
/
list_2_16_optimize_rf_gb_with_conditional_search_space.py
52 lines (42 loc) · 1.47 KB
/
list_2_16_optimize_rf_gb_with_conditional_search_space.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import optuna
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
data = fetch_openml(name="adult")
X = pd.get_dummies(data["data"])
y = [1 if d == ">50K" else 0 for d in data["target"]]
def objective(trial):
clf_name = trial.suggest_categorical("clf", ("RF", "GB"))
# clf_name の値によってハイパーパラメータを分岐させる
if clf_name == "RF":
clf = RandomForestClassifier(
max_depth=trial.suggest_int(
"rf_max_depth", 2, 32,
),
min_samples_split=trial.suggest_float(
"rf_min_samples_split", 0, 1,
),
)
else:
clf = GradientBoostingClassifier(
max_depth=trial.suggest_int(
"gb_max_depth", 2, 32,
),
min_samples_split=trial.suggest_float(
"gb_min_samples_split", 0, 1,
),
)
score = cross_val_score(clf, X, y, cv=3)
accuracy = score.mean()
return accuracy
# study_name と storage を指定することで study を保存できる
study = optuna.create_study(
direction="maximize",
study_name="ch2-conditional",
storage="sqlite:///optuna.db",
)
study.optimize(objective, n_trials=100)
print(f"Best objective value: {study.best_value}")
print(f"Best parameter: {study.best_params}")