## Optunaによるハイパーパラメータ最適化
- Optuna 要インストール
 - https://optuna.org

In [1]:
import numpy as np
from sklearn.svm import SVC
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import scale
import optuna

### Breast Cancerデータのロード

In [2]:
df = load_breast_cancer()
X = scale(df.data)
y = df.target

### 最適化関数の定義

In [3]:
def objective(trial):
    param_kernel = trial.suggest_categorical('param_kernel', ['rbf', 'poly'])
    param_gamma = trial.suggest_loguniform('param_gamma', 1e-3, 1e3)
    param_degree = trial.suggest_int('param_degree', 1, 4)
    param_coef0 =  trial.suggest_int('param_coef0', 0, 1e3)
    svm = SVC(kernel=param_kernel, gamma=param_gamma, degree=param_degree, coef0=param_coef0)

    # クロスバリデーションのテストデータに対するAccuracy
    score = cross_val_score(svm, X_train, y_train, cv=5)
    # デフォルトでは最小化になっているため，(1 - accuracy)を返すように設定
    return 1.0 - score.mean()

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# サンプラーインスタンスの生成
smp = optuna.samplers.TPESampler(seed=1)
# 最適化インスタンスの生成
study = optuna.create_study(sampler=smp)
# 最適化の実行，n_trialsは試行回数
study.optimize(objective, n_trials=30)

[32m[I 2021-02-16 00:56:09,715][0m A new study created in memory with name: no-name-7ae19917-6f01-4ef2-a195-91fe4015245f[0m
[32m[I 2021-02-16 00:56:09,770][0m Trial 0 finished with value: 0.04531645569620246 and parameters: {'param_kernel': 'poly', 'param_gamma': 961.8533582409043, 'param_degree': 1, 'param_coef0': 767}. Best is trial 0 with value: 0.04531645569620246.[0m
[32m[I 2021-02-16 00:56:09,797][0m Trial 1 finished with value: 0.037753164556962204 and parameters: {'param_kernel': 'poly', 'param_gamma': 0.0651621545821569, 'param_degree': 4, 'param_coef0': 960}. Best is trial 1 with value: 0.037753164556962204.[0m
[32m[I 2021-02-16 00:56:09,848][0m Trial 2 finished with value: 0.06281645569620253 and parameters: {'param_kernel': 'rbf', 'param_gamma': 0.23959864837279687, 'param_degree': 4, 'param_coef0': 749}. Best is trial 1 with value: 0.037753164556962204.[0m
[32m[I 2021-02-16 00:56:09,899][0m Trial 3 finished with value: 0.06281645569620253 and parameters: {'pa

### Bestパラメータで再度学習を行いAccuracyを算出

In [9]:
svm_best = SVC(kernel=study.best_params['param_kernel'], 
                            gamma=study.best_params['param_gamma'], 
                            degree=study.best_params['param_degree'],
                            coef0=study.best_params['param_coef0'])
svm_best.fit(X_train, y_train)
trn_acc = svm_best.score(X_train, y_train)
tst_acc = svm_best.score(X_test, y_test)
print("Train acc: %.4f, Test acc: %.4f" % (trn_acc, tst_acc))

Train acc: 0.9925, Test acc: 0.9649


### Best パラメータを表示

In [10]:
print(study.best_params)

{'param_kernel': 'rbf', 'param_gamma': 0.07579509499366509, 'param_degree': 1, 'param_coef0': 907}
