# ある特定の範囲のなかで最適なパラメータを発見したい

In [26]:
import numpy as np
from sklearn import  linear_model,datasets
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import accuracy_score




iris=datasets.load_iris()
features=iris.data
target=iris.target
f_train,f_test,t_train,t_test=train_test_split(features,target,random_state=1)

logit=linear_model.LogisticRegression()

penalty=['l1','l2']
C=np.logspace(0,4,10)

hypers=dict(C=C,penalty=penalty)

# verboseを1~3に設定すると、実行経過のログを出力する
gridsearch=GridSearchCV(logit,hypers,cv=5,verbose=0)

best_model=gridsearch.fit(f_train,t_train)
t_pred=best_model.predict(f_test)
accuracy_score(t_pred,t_test)

0.9210526315789473

In [31]:
print(f'best penalty is {best_model.best_estimator_.get_params()["penalty"]}')
print(f'best C is {best_model.best_estimator_.get_params()["C"]}')

best penalty is l2
best C is 2.7825594022071245


# 比較的安価な方法でパラメータサーチを行いたい

In [43]:
from scipy.stats import  uniform
from sklearn import  linear_model,datasets
from sklearn.model_selection import RandomizedSearchCV

iris=datasets.load_iris()
features=iris.data
target=iris.target

f_train,f_test,t_train,t_test=train_test_split(features,target,random_state=1)

logit=linear_model.LogisticRegression()

penalty=['l1','l2']

# パラメータ候補をランダムに生成するジェネレータ
C=uniform(loc=0,scale=4)

hypers=dict(C=C,penalty=penalty)

random_search=RandomizedSearchCV(
logit,
hypers,
random_state=1,
n_iter=100,
cv=5,
verbose=0)

best_model=random_search.fit(f_train,t_train)

In [44]:
t_pred=best_model.predict(f_test)
accuracy_score(t_pred,t_test)

0.9210526315789473

精度はGridSearchとほぼ変わらない

In [45]:
print(f'best penalty is {best_model.best_estimator_.get_params()["penalty"]}')
print(f'best C is {best_model.best_estimator_.get_params()["C"]}')

best penalty is l2
best C is 3.730229437354635
