https://towardsdatascience.com/5x-faster-scikit-learn-parameter-tuning-in-5-lines-of-code-be6bdd21833c

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from tune_sklearn import TuneGridSearchCV

Dummy custom classification dataset and running SGDClassifier

In [3]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
X, y = make_classification(n_samples=11000, 
                           n_features=1000, 
                           n_informative=50, 
                           n_redundant=0, 
                           n_classes=10, 
                           class_sep=2.5)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1000)
parameters = {
    'alpha': [1e-4, 1e-1, 1],
    'epsilon': [0.01, 0.1]
}

In [4]:
tune_search = TuneGridSearchCV(SGDClassifier(), 
                               parameters, 
                               early_stopping=True, 
                               max_iters=10)
import time
start = time.time()
tune_search.fit(X_train, y_train)
end = time.time()
print("Tune Fit Time:", end - start)
pred = tune_search.predict(X_test)
accuracy = np.count_nonzero(np.array(pred) == np.array(y_test)) / len(pred)
print("Tune Accuracy:", accuracy)



Tune Fit Time: 13.91748595237732
Tune Accuracy: 0.886


Comparing TuneGridSearchCV with GridSearchCV of sklearn

In [5]:
from sklearn.model_selection import GridSearchCV
sklearn_search = GridSearchCV(SGDClassifier(),
                             parameters,
                             n_jobs=-1)
start = time.time()
sklearn_search.fit(X_train, y_train)
end = time.time()
print("Sklearn fit time:", end - start)
pred = sklearn_search.predict(X_test)
accuracy = np.count_nonzero(np.array(pred) == np.array(y_test)) / len(pred)
print("Accuracy:", accuracy)

Sklearn fit time: 51.54976415634155
Accuracy: 0.881


Example for TuneSearchCV bayesian optimization

In [6]:
from tune_sklearn import TuneSearchCV

# Other imports
import scipy
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier

# Set training and validation sets
X, y = make_classification(n_samples=11000, n_features=1000, n_informative=50, 
                           n_redundant=0, n_classes=10, class_sep=2.5)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1000)

# Example parameter distributions to tune from SGDClassifier
# Note the use of tuples instead if Bayesian optimization is desired
param_dists = {
   'alpha': (1e-4, 1e-1),
   'epsilon': (1e-2, 1e-1)
}

tune_search = TuneSearchCV(SGDClassifier(),
   param_distributions=param_dists,
   n_iter=2,
   early_stopping=True,
   max_iters=10,
   search_optimization="bayesian"
)

tune_search.fit(X_train, y_train)
print(tune_search.best_params_) 



{'alpha': 0.05918516832005342, 'epsilon': 0.013292858836067808}


More examples at https://github.com/ray-project/tune-sklearn