# Comparing randomized search and grid search for hyperparameter estimation

Compare randomized search and grid search for optimizing hyperparameters of a linear SVM with SGD training. All parameters that influence the learning are searched simultaneously (except for the number of estimators, which poses a time / quality tradeoff).


In [96]:
from time import time

import numpy as np
import scipy.stats as stats
import pandas as pd

from sklearn.datasets import load_digits
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [97]:
# get some data

X, y = load_digits(return_X_y=True, n_class=3)

In [100]:
pd.DataFrame(X).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0
3,0.0,0.0,1.0,9.0,15.0,11.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,10.0,13.0,3.0,0.0,0.0
4,0.0,0.0,0.0,0.0,14.0,13.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,13.0,16.0,1.0,0.0


In [103]:
pd.DataFrame(y).value_counts()

Unnamed: 0_level_0,count
0,Unnamed: 1_level_1
1,182
0,178
2,177


In [104]:
# build a classifier

clf = SGDClassifier(loss="hinge", penalty="elasticnet", fit_intercept=True)

In [105]:
# Utility function to report best scores

def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results["rank_test_score"] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print(
                "Mean validation score: {0:.3f} (std: {1:.3f})".format(
                    results["mean_test_score"][candidate],
                    results["std_test_score"][candidate],
                )
            )
            print("Parameters: {0}".format(results["params"][candidate]))
            print("")


In [106]:
# specify parameters and distributions to sample from

param_dist = {
    "average": [True, False],
    "l1_ratio": stats.uniform(0, 1),
    "alpha": stats.loguniform(1e-2, 1e0),
}

In [107]:
# run randomized search
n_iter_search = 15
random_search = RandomizedSearchCV(
    clf, param_distributions=param_dist, n_iter=n_iter_search
)

start = time()
random_search.fit(X, y)
print(
    "RandomizedSearchCV took %.2f seconds for %d candidates parameter settings."
    % ((time() - start), n_iter_search)
)
report(random_search.cv_results_)

RandomizedSearchCV took 1.94 seconds for 15 candidates parameter settings.
Model with rank: 1
Mean validation score: 0.993 (std: 0.009)
Parameters: {'alpha': 0.0316581692318797, 'average': False, 'l1_ratio': 0.09387329008129175}

Model with rank: 2
Mean validation score: 0.987 (std: 0.014)
Parameters: {'alpha': 0.031294064907273156, 'average': False, 'l1_ratio': 0.15864644764249103}

Model with rank: 3
Mean validation score: 0.985 (std: 0.014)
Parameters: {'alpha': 0.015691639471778155, 'average': False, 'l1_ratio': 0.8069129770507795}



In [108]:
# use a full grid over all parameters
param_grid = {
    "average": [True, False],
    "l1_ratio": np.linspace(0, 1, num=10),
    "alpha": np.power(10, np.arange(-2, 1, dtype=float)),
}

# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)

print(
    "GridSearchCV took %.2f seconds for %d candidate parameter settings."
    % (time() - start, len(grid_search.cv_results_["params"]))
)
report(grid_search.cv_results_)

GridSearchCV took 6.98 seconds for 60 candidate parameter settings.
Model with rank: 1
Mean validation score: 0.991 (std: 0.006)
Parameters: {'alpha': 0.01, 'average': True, 'l1_ratio': 1.0}

Model with rank: 2
Mean validation score: 0.991 (std: 0.010)
Parameters: {'alpha': 0.01, 'average': True, 'l1_ratio': 0.5555555555555556}

Model with rank: 3
Mean validation score: 0.989 (std: 0.018)
Parameters: {'alpha': 0.01, 'average': False, 'l1_ratio': 0.5555555555555556}



Conclusion

The randomized search and the grid search explore exactly the same space of parameters. The result in parameter settings is quite similar, while the run time for randomized search is drastically lower.

The performance is may slightly worse for the randomized search, and is likely due to a noise effect and would not carry over to a held-out test set.