<center><u><H1>Hyperparameter Optimization: GridSearchCV</H1></u></center>

In [1]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

## Data:

In [2]:
iris = load_iris()
X=iris.data
y=iris.target

In [3]:
sc = StandardScaler()
X_std = sc.fit_transform(X)

## Models:

In [4]:
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

In [5]:
svm = SVC()
knn = KNN()
lr = LogisticRegression()
rf = RandomForestClassifier()
xgb = xgb.XGBClassifier(objective="multi:softprob")

In [6]:
clf_svm = GridSearchCV(svm, {'kernel':['linear', 'poly','rbf'], 'C':[1, 10, 100], 'gamma':[0.1, 1.0, 10]}, cv=5)
clf_knn = GridSearchCV(knn, {'n_neighbors':[2, 5, 8, 10]}, cv=5)
clf_lr = GridSearchCV(lr, {'C':[1000, 100, 10, 1], 'solver':['lbfgs', 'newton-cg'], 'multi_class':['auto','multinomial']}, cv=5)
clf_rf = GridSearchCV(rf, {'n_estimators':[50,100,500,1000]}, cv=5)
clf_xgb = GridSearchCV(xgb, {'eta':[0.1,0.2,0.3],'max_depth':[3,4,5], 'n_estimators':[50,100,500,1000]}, cv=5)

In [7]:
clf_svm.fit(X_std, y)
clf_knn.fit(X_std, y)
clf_lr.fit(X_std, y)
clf_rf.fit(X_std, y)
clf_xgb.fit(X_std, y)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                     colsample_bylevel=1, colsample_bynode=1,
                                     colsample_bytree=1, gamma=0,
                                     learning_rate=0.1, max_delta_step=0,
                                     max_depth=3, min_child_weight=1,
                                     missing=None, n_estimators=100, n_jobs=1,
                                     nthread=None, objective='multi:softprob',
                                     random_state=0, reg_alpha=0, reg_lambda=1,
                                     scale_pos_weight=1, seed=None, silent=None,
                                     subsample=1, verbosity=1),
             iid='warn', n_jobs=None,
             param_grid={'eta': [0.1, 0.2, 0.3], 'max_depth': [3, 4, 5],
                         'n_estimators': [50, 100, 500, 1000]},
             pre_dispatch='2*n_

## Best Params:

In [8]:
print(f"Logistic Regression: {clf_lr.best_params_}")

Logistic Regression: {'C': 1000, 'multi_class': 'auto', 'solver': 'lbfgs'}


In [9]:
print(f"KNN: {clf_knn.best_params_}")

KNN: {'n_neighbors': 8}


In [10]:
print(f"Support Vector Machines: {clf_svm.best_params_}")

Support Vector Machines: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}


In [11]:
print(f"Random Forest: {clf_rf.best_params_}")

Random Forest: {'n_estimators': 50}


In [12]:
print(f"XGBoost: {clf_xgb.best_params_}")

XGBoost: {'eta': 0.1, 'max_depth': 3, 'n_estimators': 500}


In [13]:
# Logistic Regression
means = clf_lr.cv_results_['mean_test_score']
stds = clf_lr.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf_lr.cv_results_['params']):
    print("{0:0.3f} +/-{1:0.03f} for {2}".format(mean, std * 2, params))

0.973 +/-0.065 for {'C': 1000, 'multi_class': 'auto', 'solver': 'lbfgs'}
0.973 +/-0.065 for {'C': 1000, 'multi_class': 'auto', 'solver': 'newton-cg'}
0.973 +/-0.065 for {'C': 1000, 'multi_class': 'multinomial', 'solver': 'lbfgs'}
0.973 +/-0.065 for {'C': 1000, 'multi_class': 'multinomial', 'solver': 'newton-cg'}
0.973 +/-0.065 for {'C': 100, 'multi_class': 'auto', 'solver': 'lbfgs'}
0.973 +/-0.065 for {'C': 100, 'multi_class': 'auto', 'solver': 'newton-cg'}
0.973 +/-0.065 for {'C': 100, 'multi_class': 'multinomial', 'solver': 'lbfgs'}
0.973 +/-0.065 for {'C': 100, 'multi_class': 'multinomial', 'solver': 'newton-cg'}
0.973 +/-0.065 for {'C': 10, 'multi_class': 'auto', 'solver': 'lbfgs'}
0.973 +/-0.065 for {'C': 10, 'multi_class': 'auto', 'solver': 'newton-cg'}
0.973 +/-0.065 for {'C': 10, 'multi_class': 'multinomial', 'solver': 'lbfgs'}
0.973 +/-0.065 for {'C': 10, 'multi_class': 'multinomial', 'solver': 'newton-cg'}
0.960 +/-0.078 for {'C': 1, 'multi_class': 'auto', 'solver': 'lbfgs'}


In [14]:
# KNN
means = clf_knn.cv_results_['mean_test_score']
stds = clf_knn.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf_knn.cv_results_['params']):
    print("{0:0.3f} +/-{1:0.03f} for {2}".format(mean, std * 2, params))

0.947 +/-0.080 for {'n_neighbors': 2}
0.960 +/-0.050 for {'n_neighbors': 5}
0.967 +/-0.042 for {'n_neighbors': 8}
0.960 +/-0.027 for {'n_neighbors': 10}


In [15]:
# Support Vector Machines
means = clf_svm.cv_results_['mean_test_score']
stds = clf_svm.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf_svm.cv_results_['params']):
    print("{0:0.3f} +/-{1:0.03f} for {2}".format(mean, std * 2, params))

0.967 +/-0.060 for {'C': 1, 'gamma': 0.1, 'kernel': 'linear'}
0.827 +/-0.078 for {'C': 1, 'gamma': 0.1, 'kernel': 'poly'}
0.973 +/-0.050 for {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
0.967 +/-0.060 for {'C': 1, 'gamma': 1.0, 'kernel': 'linear'}
0.973 +/-0.050 for {'C': 1, 'gamma': 1.0, 'kernel': 'poly'}
0.947 +/-0.053 for {'C': 1, 'gamma': 1.0, 'kernel': 'rbf'}
0.967 +/-0.060 for {'C': 1, 'gamma': 10, 'kernel': 'linear'}
0.947 +/-0.080 for {'C': 1, 'gamma': 10, 'kernel': 'poly'}
0.900 +/-0.112 for {'C': 1, 'gamma': 10, 'kernel': 'rbf'}
0.960 +/-0.078 for {'C': 10, 'gamma': 0.1, 'kernel': 'linear'}
0.900 +/-0.060 for {'C': 10, 'gamma': 0.1, 'kernel': 'poly'}
0.973 +/-0.065 for {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}
0.960 +/-0.078 for {'C': 10, 'gamma': 1.0, 'kernel': 'linear'}
0.960 +/-0.065 for {'C': 10, 'gamma': 1.0, 'kernel': 'poly'}
0.953 +/-0.033 for {'C': 10, 'gamma': 1.0, 'kernel': 'rbf'}
0.960 +/-0.078 for {'C': 10, 'gamma': 10, 'kernel': 'linear'}
0.947 +/-0.080 for {'C': 10, '

In [16]:
# Random Forest
means = clf_rf.cv_results_['mean_test_score']
stds = clf_rf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf_rf.cv_results_['params']):
    print("{0:0.3f} +/-{1:0.03f} for {2}".format(mean, std * 2, params))

0.960 +/-0.050 for {'n_estimators': 50}
0.947 +/-0.080 for {'n_estimators': 100}
0.960 +/-0.050 for {'n_estimators': 500}
0.960 +/-0.050 for {'n_estimators': 1000}


In [17]:
# XGBoost
means = clf_xgb.cv_results_['mean_test_score']
stds = clf_xgb.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf_xgb.cv_results_['params']):
    print("{0:0.3f} +/-{1:0.03f} for {2}".format(mean, std * 2, params))

0.953 +/-0.068 for {'eta': 0.1, 'max_depth': 3, 'n_estimators': 50}
0.953 +/-0.068 for {'eta': 0.1, 'max_depth': 3, 'n_estimators': 100}
0.960 +/-0.050 for {'eta': 0.1, 'max_depth': 3, 'n_estimators': 500}
0.960 +/-0.050 for {'eta': 0.1, 'max_depth': 3, 'n_estimators': 1000}
0.960 +/-0.050 for {'eta': 0.1, 'max_depth': 4, 'n_estimators': 50}
0.960 +/-0.050 for {'eta': 0.1, 'max_depth': 4, 'n_estimators': 100}
0.960 +/-0.050 for {'eta': 0.1, 'max_depth': 4, 'n_estimators': 500}
0.960 +/-0.050 for {'eta': 0.1, 'max_depth': 4, 'n_estimators': 1000}
0.960 +/-0.050 for {'eta': 0.1, 'max_depth': 5, 'n_estimators': 50}
0.960 +/-0.050 for {'eta': 0.1, 'max_depth': 5, 'n_estimators': 100}
0.960 +/-0.050 for {'eta': 0.1, 'max_depth': 5, 'n_estimators': 500}
0.960 +/-0.050 for {'eta': 0.1, 'max_depth': 5, 'n_estimators': 1000}
0.953 +/-0.068 for {'eta': 0.2, 'max_depth': 3, 'n_estimators': 50}
0.953 +/-0.068 for {'eta': 0.2, 'max_depth': 3, 'n_estimators': 100}
0.960 +/-0.050 for {'eta': 0.2, 'ma

## Reference:
http://scikit-learn.org/stable/modules/grid_search.html