## Optunaによるハイパーパラメータ最適化
- Optuna 要インストール
 - https://optuna.org

In [3]:
import numpy as np
from sklearn.svm import SVC
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import scale
import optuna

### Breast Cancerデータのロード

In [4]:
df = load_breast_cancer()
X = scale(df.data)
y = df.target

### 最適化関数の定義

In [9]:
def objective(trial):
    param_kernel = trial.suggest_categorical('param_kernel', ['rbf', 'poly'])
    param_gamma = trial.suggest_float('param_gamma', 1e-3, 1e3)
    param_degree = trial.suggest_int('param_degree', 1, 4)
    param_coef0 =  trial.suggest_int('param_coef0', 0, 1e3)
    svm = SVC(kernel=param_kernel, gamma=param_gamma, degree=param_degree, coef0=param_coef0)

    # クロスバリデーションのテストデータに対するAccuracy
    score = cross_val_score(svm, X_train, y_train, cv=5)
    # デフォルトでは最小化になっているため，(1 - accuracy)を返すように設定
    return 1.0 - score.mean()

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# サンプラーインスタンスの生成
smp = optuna.samplers.TPESampler(seed=1)
# 最適化インスタンスの生成
study = optuna.create_study(sampler=smp)
# 最適化の実行，n_trialsは試行回数
study.optimize(objective, n_trials=30)

[I 2025-01-29 16:28:38,708] A new study created in memory with name: no-name-8defc946-58b8-4ec8-a71f-23d251a44879
[I 2025-01-29 16:28:38,731] Trial 0 finished with value: 0.035221518987341716 and parameters: {'param_kernel': 'poly', 'param_gamma': 0.1153747029700693, 'param_degree': 2, 'param_coef0': 146}. Best is trial 0 with value: 0.035221518987341716.
[I 2025-01-29 16:28:38,745] Trial 1 finished with value: 0.05778481012658221 and parameters: {'param_kernel': 'poly', 'param_gamma': 345.5613814823207, 'param_degree': 2, 'param_coef0': 539}. Best is trial 0 with value: 0.035221518987341716.
[I 2025-01-29 16:28:38,761] Trial 2 finished with value: 0.15835443037974684 and parameters: {'param_kernel': 'poly', 'param_gamma': 204.4530452792677, 'param_degree': 4, 'param_coef0': 27}. Best is trial 0 with value: 0.035221518987341716.
[I 2025-01-29 16:28:38,783] Trial 3 finished with value: 0.37436708860759504 and parameters: {'param_kernel': 'rbf', 'param_gamma': 558.6902697559232, 'param_d

### Bestパラメータで再度学習を行いAccuracyを算出

In [14]:
svm_best = SVC(kernel=study.best_params['param_kernel'], 
                            gamma=study.best_params['param_gamma'], 
                            degree=study.best_params['param_degree'],
                            coef0=study.best_params['param_coef0'])
svm_best.fit(X_train, y_train)
trn_acc = svm_best.score(X_train, y_train)
tst_acc = svm_best.score(X_test, y_test)
print("Train acc: %.4f, Test acc: %.4f" % (trn_acc, tst_acc))

Train acc: 1.0000, Test acc: 0.9474


### Best パラメータを表示

In [15]:
print(study.best_params)

{'param_kernel': 'poly', 'param_gamma': 313.42486473506466, 'param_degree': 3, 'param_coef0': 877}
