In [1]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

from time import time
X, y = make_regression(n_samples=10000, n_features=50, n_informative=30,
                       random_state=0, shuffle=False)

regr = RandomForestRegressor(n_estimators = 200, max_depth = 20, random_state=0)

start = time()
regr.fit(X, y)

print("Training took", (time()-start), "seconds")

Training took 46.436320066452026 seconds


In [2]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

from time import time
X, y = make_regression(n_samples=10000, n_features=50, n_informative=30,
                       random_state=0, shuffle=False)

regr = RandomForestRegressor(n_estimators = 200, max_depth = 20, random_state=0, n_jobs=4)

start = time()
regr.fit(X, y)

print("Training took", (time()-start), "seconds")

Training took 14.269346952438354 seconds


In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

from time import time
X, y = make_regression(n_samples=10000, n_features=50, n_informative=30,
                       random_state=0, shuffle=False)

regr = RandomForestRegressor(n_estimators = 200, max_depth = 20, random_state=0, n_jobs=8)

start = time()
regr.fit(X, y)

print("Training took", (time()-start), "seconds")

Training took 9.367990016937256 seconds


In [4]:
import numpy as np

from time import time
import scipy.stats as stats
from sklearn.utils.fixes import loguniform

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.linear_model import SGDClassifier

N = 4

# get some data
X, y = load_digits(return_X_y=True, n_class=3)

# build a classifier
clf = SGDClassifier(loss="hinge", penalty="elasticnet", fit_intercept=True)

# Utility function to report best scores
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results["rank_test_score"] == i)
        for candidate in candidates:
            print("Model with rank {0}".format(i))
            print(
                "Mean validation score: {0:.3f} (std: {1:.3f})".format(
                    results["mean_test_score"][candidate],
                    results["std_test_score"][candidate],
                )
            )
            print("Parameters: {0}".format(results["params"][candidate]))

# specify parameters and distributions to sample from
param_dist = {
    "average":[True, False],
    "l1_ratio":stats.uniform(0,1),
    "alpha":loguniform(1e-2, 1e0),
}

# run randomised search
n_iter_search = 50
random_search = RandomizedSearchCV(
    clf, param_distributions=param_dist, n_iter=n_iter_search, n_jobs=N
)

start = time()
random_search.fit(X, y)
print(
    "RandomizedSearchCV took %.2f seconds for %d candidates parameter settings."
    % ((time()-start), n_iter_search)
)
report(random_search.cv_results_)

# specify parameters and distributions to sample from
param_grid = {
    "average":[True, False],
    "l1_ratio":np.linspace(0,1,num=20),
    "alpha":np.power(10,np.arange(-5,5,dtype=float)),
}

# run randomised search
grid_search = GridSearchCV(
    clf, param_grid=param_grid, n_jobs=N
)

start = time()
grid_search.fit(X, y)
print(
    "GridSearchCV took %.2f seconds for %d candidates parameter settings."
    % ((time()-start), len(grid_search.cv_results_["params"]))
)
report(grid_search.cv_results_)

RandomizedSearchCV took 1.15 seconds for 50 candidates parameter settings.
Model with rank 1
Mean validation score: 0.994 (std: 0.007)
Parameters: {'alpha': 0.17289054540598126, 'average': False, 'l1_ratio': 0.07257118292834652}
Model with rank 2
Mean validation score: 0.987 (std: 0.014)
Parameters: {'alpha': 0.17948948768468748, 'average': False, 'l1_ratio': 0.11471570431738864}
Model with rank 3
Mean validation score: 0.987 (std: 0.017)
Parameters: {'alpha': 0.3609948251175031, 'average': False, 'l1_ratio': 0.24495566502457322}
GridSearchCV took 2.13 seconds for 400 candidates parameter settings.
Model with rank 1
Mean validation score: 0.994 (std: 0.007)
Parameters: {'alpha': 0.1, 'average': False, 'l1_ratio': 0.21052631578947367}
Model with rank 2
Mean validation score: 0.993 (std: 0.007)
Parameters: {'alpha': 0.001, 'average': True, 'l1_ratio': 0.42105263157894735}
Model with rank 2
Mean validation score: 0.993 (std: 0.007)
Parameters: {'alpha': 0.1, 'average': False, 'l1_ratio': 