In [49]:
from functools import partial

import optuna
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import datasets

In [10]:
def objective(X, y, trial):
    """最小化する目的関数"""
    params = {
        'kernel': trial.suggest_categorical('kernel', ['rbf', 'sigmoid']),
        'C': trial.suggest_loguniform('C', 1e+0, 1e+2),
        'gamma': trial.suggest_loguniform('gamma', 1e-2, 1e+1),
    }

    # モデルを作る
    model = SVC(**params)

    # 5-Fold CV / Accuracy でモデルを評価する
    kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    scores = cross_validate(model, X=X, y=y, cv=kf)
    # 最小化なので 1.0 からスコアを引く
    return 1.0 - scores['test_score'].mean()

In [18]:
import os
import pandas as pd

In [46]:
data = pd.read_csv("~/.kaggle/competitions/titanic/train.csv")

In [47]:
X = data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Cabin', 'Embarked']]
X = pd.get_dummies(X, columns=['Pclass', 'Sex', 'SibSp', 'Parch', 'Cabin', 'Embarked']).fillna(0)
y = data['Survived']

In [53]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [55]:
model = SVC()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))

0.6983240223463687


In [56]:
# 目的関数にデータを適用する
f = partial(objective, X_train, y_train)

# 最適化のセッションを作る
study = optuna.create_study()

# 100 回試行する
study.optimize(f, n_trials=100)

# 最適化したパラメータを出力する
print('params:', study.best_params)

[I 2019-05-11 18:16:11,400] Finished trial#0 resulted in value: 0.20775489618762033. Current best value is 0.20775489618762033 with parameters: {'kernel': 'rbf', 'C': 1.4932678560000918, 'gamma': 0.029592310333118994}.
[I 2019-05-11 18:16:11,837] Finished trial#1 resulted in value: 0.43551638652028235. Current best value is 0.20775489618762033 with parameters: {'kernel': 'rbf', 'C': 1.4932678560000918, 'gamma': 0.029592310333118994}.
[I 2019-05-11 18:16:12,286] Finished trial#2 resulted in value: 0.4565449334403484. Current best value is 0.20775489618762033 with parameters: {'kernel': 'rbf', 'C': 1.4932678560000918, 'gamma': 0.029592310333118994}.
[I 2019-05-11 18:16:12,815] Finished trial#3 resulted in value: 0.393340681893274. Current best value is 0.20775489618762033 with parameters: {'kernel': 'rbf', 'C': 1.4932678560000918, 'gamma': 0.029592310333118994}.
[I 2019-05-11 18:16:13,312] Finished trial#4 resulted in value: 0.5127360602637798. Current best value is 0.20775489618762033 w

[I 2019-05-11 18:16:32,987] Finished trial#37 resulted in value: 0.4706198705899036. Current best value is 0.18533759717457643 with parameters: {'kernel': 'rbf', 'C': 7.0898706115848, 'gamma': 0.019733102115916366}.
[I 2019-05-11 18:16:33,544] Finished trial#38 resulted in value: 0.22059007818672194. Current best value is 0.18533759717457643 with parameters: {'kernel': 'rbf', 'C': 7.0898706115848, 'gamma': 0.019733102115916366}.
[I 2019-05-11 18:16:34,175] Finished trial#39 resulted in value: 0.2541466624475315. Current best value is 0.18533759717457643 with parameters: {'kernel': 'rbf', 'C': 7.0898706115848, 'gamma': 0.019733102115916366}.
[I 2019-05-11 18:16:34,619] Finished trial#40 resulted in value: 0.4312612070778088. Current best value is 0.18533759717457643 with parameters: {'kernel': 'rbf', 'C': 7.0898706115848, 'gamma': 0.019733102115916366}.
[I 2019-05-11 18:16:35,301] Finished trial#41 resulted in value: 0.2527970281641243. Current best value is 0.18533759717457643 with par

[I 2019-05-11 18:16:54,899] Finished trial#74 resulted in value: 0.22035215808065678. Current best value is 0.18533759717457643 with parameters: {'kernel': 'rbf', 'C': 7.0898706115848, 'gamma': 0.019733102115916366}.
[I 2019-05-11 18:16:55,431] Finished trial#75 resulted in value: 0.4298626056792074. Current best value is 0.18533759717457643 with parameters: {'kernel': 'rbf', 'C': 7.0898706115848, 'gamma': 0.019733102115916366}.
[I 2019-05-11 18:16:56,027] Finished trial#76 resulted in value: 0.20220002752217314. Current best value is 0.18533759717457643 with parameters: {'kernel': 'rbf', 'C': 7.0898706115848, 'gamma': 0.019733102115916366}.
[I 2019-05-11 18:16:56,600] Finished trial#77 resulted in value: 0.19097153969794056. Current best value is 0.18533759717457643 with parameters: {'kernel': 'rbf', 'C': 7.0898706115848, 'gamma': 0.019733102115916366}.
[I 2019-05-11 18:16:57,200] Finished trial#78 resulted in value: 0.22203834523283128. Current best value is 0.18533759717457643 with 

params: {'kernel': 'rbf', 'C': 7.660525691175324, 'gamma': 0.013116664828190997}


In [58]:
op_model = SVC(**study.best_params)
op_model.fit(X_train, y_train)
print(op_model.score(X_test, y_test))

0.8156424581005587
