Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add n_jobs as parameter for AutoML #403

Merged
merged 21 commits into from Feb 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
594e627
Add n_jobs as parameter
jeremyliweishih Feb 20, 2020
078fe85
Add tests
jeremyliweishih Feb 20, 2020
0360939
Changelog
jeremyliweishih Feb 20, 2020
5bf11b0
Merge branch 'master' into 382_n_jobs
jeremyliweishih Feb 20, 2020
c3f3fe6
Merge branch 'master' of https://github.com/FeatureLabs/evalml into 3…
jeremyliweishih Feb 20, 2020
b6f3ad5
Standardize defaults and make sure n_jobs passed in
jeremyliweishih Feb 24, 2020
574cbd1
Add tests ensuring n_jos gets passed down
jeremyliweishih Feb 24, 2020
b083e59
Add validation for n_jobs
jeremyliweishih Feb 24, 2020
ce151f0
Merge branch '382_n_jobs' of https://github.com/FeatureLabs/evalml in…
jeremyliweishih Feb 24, 2020
31b30ed
Merge branch 'master' into 382_n_jobs
jeremyliweishih Feb 24, 2020
f8cc1a6
Merge branch 'master' of https://github.com/FeatureLabs/evalml into 3…
jeremyliweishih Feb 25, 2020
54a5cbe
Add 0 case for n_jobs validation and test
jeremyliweishih Feb 25, 2020
d456c76
Merge branch '382_n_jobs' of https://github.com/FeatureLabs/evalml in…
jeremyliweishih Feb 25, 2020
c9882cd
Add none case and make logic prettier
jeremyliweishih Feb 25, 2020
eb618c6
Merge branch 'master' into 382_n_jobs
jeremyliweishih Feb 25, 2020
5aaad35
Lint
jeremyliweishih Feb 25, 2020
2d45f6a
Merge branch '382_n_jobs' of https://github.com/FeatureLabs/evalml in…
jeremyliweishih Feb 25, 2020
f182f6e
Merge branch 'master' into 382_n_jobs
jeremyliweishih Feb 26, 2020
858ed6f
Update docstrings
jeremyliweishih Feb 26, 2020
5ede776
Merge branch '382_n_jobs' of https://github.com/FeatureLabs/evalml in…
jeremyliweishih Feb 26, 2020
2e87415
lint
jeremyliweishih Feb 26, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/changelog.rst
Expand Up @@ -7,6 +7,7 @@ Changelog
* Added emacs buffers to .gitignore :pr:`350`
* Add CatBoost (gradient-boosted trees) classification and regression components and pipelines :pr:`247`
* Added Tuner abstract base class :pr:`351`
* Added n_jobs as parameter for AutoClassificationSearch and AutoRegressionSearch :pr:`403`
* Fixes
* Fixed ROC and confusion matrix plots not being calculated if user passed own additional_objectives :pr:`276`
* Changes
Expand Down
7 changes: 5 additions & 2 deletions evalml/automl/auto_base.py
Expand Up @@ -24,7 +24,7 @@ class AutoBase:

def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time,
patience, tolerance, model_types, detect_label_leakage, start_iteration_callback,
add_result_callback, additional_objectives, random_state, verbose):
add_result_callback, additional_objectives, random_state, n_jobs, verbose):
if tuner is None:
tuner = SKOptTuner
self.objective = get_objective(objective)
Expand Down Expand Up @@ -72,9 +72,12 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time,
'search_order': []
}
self.trained_pipelines = {}

self.random_state = random_state
random.seed(self.random_state)
np.random.seed(seed=self.random_state)

self.n_jobs = n_jobs
self.possible_model_types = list(set([p.model_type for p in self.possible_pipelines]))

self.tuners = {}
Expand Down Expand Up @@ -227,7 +230,7 @@ def _do_iteration(self, X, y, pbar, raise_errors):
pipeline = pipeline_class(
objective=self.objective,
random_state=self.random_state,
n_jobs=-1,
n_jobs=self.n_jobs,
number_features=X.shape[1],
**dict(parameters)
)
Expand Down
5 changes: 5 additions & 0 deletions evalml/automl/auto_classification_search.py
Expand Up @@ -25,6 +25,7 @@ def __init__(self,
add_result_callback=None,
additional_objectives=None,
random_state=0,
n_jobs=-1,
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved
verbose=True):
"""Automated classifier pipeline search

Expand Down Expand Up @@ -68,6 +69,9 @@ def __init__(self,

random_state (int): the random_state

n_jobs (int or None): Non-negative integer describing level of parallelism used for pipelines.
None and 1 are equivalent. If set to -1, all CPUs are used. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used.

verbose (boolean): If True, turn verbosity on. Defaults to True
"""

Expand Down Expand Up @@ -99,6 +103,7 @@ def __init__(self,
add_result_callback=add_result_callback,
additional_objectives=additional_objectives,
random_state=random_state,
n_jobs=n_jobs,
verbose=verbose
)

Expand Down
5 changes: 5 additions & 0 deletions evalml/automl/auto_regression_search.py
Expand Up @@ -24,6 +24,7 @@ def __init__(self,
add_result_callback=None,
additional_objectives=None,
random_state=0,
n_jobs=-1,
verbose=True):
"""Automated regressors pipeline search

Expand Down Expand Up @@ -65,6 +66,9 @@ def __init__(self,

random_state (int): the random_state

n_jobs (int or None): Non-negative integer describing level of parallelism used for pipelines.
None and 1 are equivalent. If set to -1, all CPUs are used. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used.

verbose (boolean): If True, turn verbosity on. Defaults to True

"""
Expand All @@ -91,5 +95,6 @@ def __init__(self,
add_result_callback=add_result_callback,
additional_objectives=additional_objectives,
random_state=random_state,
n_jobs=n_jobs,
verbose=verbose
)
4 changes: 2 additions & 2 deletions evalml/pipelines/classification/catboost.py
Expand Up @@ -25,7 +25,7 @@ class CatBoostClassificationPipeline(PipelineBase):

def __init__(self, objective, impute_strategy, n_estimators,
eta, max_depth, number_features, bootstrap_type=None,
n_jobs=1, random_state=0):
n_jobs=-1, random_state=0):
# note: impute_strategy must support both string and numeric data
imputer = SimpleImputer(impute_strategy=impute_strategy)
estimator = CatBoostClassifier(n_estimators=n_estimators,
Expand All @@ -35,5 +35,5 @@ def __init__(self, objective, impute_strategy, n_estimators,
random_state=random_state)
super().__init__(objective=objective,
component_list=[imputer, estimator],
n_jobs=1,
n_jobs=n_jobs,
random_state=random_state)
2 changes: 1 addition & 1 deletion evalml/pipelines/classification/logistic_regression.py
Expand Up @@ -32,7 +32,7 @@ def __init__(self, objective, penalty, C, impute_strategy,
estimator = LogisticRegressionClassifier(random_state=random_state,
penalty=penalty,
C=C,
n_jobs=-1)
n_jobs=n_jobs)

super().__init__(objective=objective,
component_list=[enc, imputer, scaler, estimator],
Expand Down
4 changes: 4 additions & 0 deletions evalml/pipelines/pipeline_base.py
Expand Up @@ -44,7 +44,11 @@ def __init__(self, objective, component_list, n_jobs, random_state):

self.name = self._generate_name() # autogenerated
self.results = {}

self.n_jobs = n_jobs
if not isinstance(n_jobs, (int, type(None))) or n_jobs == 0:
raise ValueError('n_jobs must be an non-zero integer or None. n_jobs is set to `{}`.'.format(n_jobs))
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved

self.parameters = {}
for component in self.component_list:
self.parameters.update(component.parameters)
Expand Down
2 changes: 1 addition & 1 deletion evalml/pipelines/regression/catboost.py
Expand Up @@ -35,5 +35,5 @@ def __init__(self, objective, impute_strategy, n_estimators, eta,
random_state=random_state)
super().__init__(objective=objective,
component_list=[imputer, estimator],
n_jobs=1,
n_jobs=n_jobs,
random_state=random_state)
6 changes: 4 additions & 2 deletions evalml/tests/automl_tests/test_auto_classification_search.py
Expand Up @@ -24,7 +24,9 @@
def test_init(X_y):
X, y = X_y

automl = AutoClassificationSearch(multiclass=False, max_pipelines=1)
automl = AutoClassificationSearch(multiclass=False, max_pipelines=1, n_jobs=4)

assert automl.n_jobs == 4

# check loads all pipelines
assert get_pipelines(problem_type=ProblemTypes.BINARY) == automl.possible_pipelines
Expand All @@ -34,7 +36,7 @@ def test_init(X_y):
assert isinstance(automl.rankings, pd.DataFrame)
assert isinstance(automl.best_pipeline, PipelineBase)
assert isinstance(automl.best_pipeline.feature_importances, pd.DataFrame)

assert automl.best_pipeline.n_jobs == 4
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved
# test with datafarmes
automl.search(pd.DataFrame(X), pd.Series(y))

Expand Down
5 changes: 4 additions & 1 deletion evalml/tests/automl_tests/test_auto_regression_search.py
Expand Up @@ -18,7 +18,9 @@ def X_y():
def test_init(X_y):
X, y = X_y

automl = AutoRegressionSearch(objective="R2", max_pipelines=3)
automl = AutoRegressionSearch(objective="R2", max_pipelines=3, n_jobs=4)

assert automl.n_jobs == 4

# check loads all pipelines
assert get_pipelines(problem_type=ProblemTypes.REGRESSION) == automl.possible_pipelines
Expand All @@ -29,6 +31,7 @@ def test_init(X_y):

assert isinstance(automl.best_pipeline, PipelineBase)
assert isinstance(automl.best_pipeline.feature_importances, pd.DataFrame)
assert automl.best_pipeline.n_jobs == 4

# test with datafarmes
automl.search(pd.DataFrame(X), pd.Series(y), raise_errors=True)
Expand Down
19 changes: 19 additions & 0 deletions evalml/tests/pipeline_tests/test_pipelines.py
Expand Up @@ -166,3 +166,22 @@ def test_multiple_feature_selectors(X_y):
clf.fit(X, y)
clf.score(X, y)
assert not clf.feature_importances.isnull().all().all()


def test_n_jobs(X_y):
with pytest.raises(ValueError, match='n_jobs must be an non-zero integer*.'):
PipelineBase('precision', component_list=['Simple Imputer', 'One Hot Encoder', StandardScaler(), 'Logistic Regression Classifier'],
n_jobs='5', random_state=0)
jeremyliweishih marked this conversation as resolved.
Show resolved Hide resolved

with pytest.raises(ValueError, match='n_jobs must be an non-zero integer*.'):
PipelineBase('precision', component_list=['Simple Imputer', 'One Hot Encoder', StandardScaler(), 'Logistic Regression Classifier'],
n_jobs=0, random_state=0)

assert PipelineBase('precision', component_list=['Simple Imputer', 'One Hot Encoder', StandardScaler(), 'Logistic Regression Classifier'],
n_jobs=-4, random_state=0)

assert PipelineBase('precision', component_list=['Simple Imputer', 'One Hot Encoder', StandardScaler(), 'Logistic Regression Classifier'],
n_jobs=4, random_state=0)

assert PipelineBase('precision', component_list=['Simple Imputer', 'One Hot Encoder', StandardScaler(), 'Logistic Regression Classifier'],
n_jobs=None, random_state=0)