alteryx · jeremyliweishih · Feb 26, 2020 · Feb 20, 2020 · Feb 20, 2020 · Feb 20, 2020
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -7,6 +7,7 @@ Changelog
         * Added emacs buffers to .gitignore :pr:`350`
         * Add CatBoost (gradient-boosted trees) classification and regression components and pipelines :pr:`247`
         * Added Tuner abstract base class :pr:`351`
+        * Added n_jobs as parameter for AutoClassificationSearch and AutoRegressionSearch :pr:`403`
     * Fixes
         * Fixed ROC and confusion matrix plots not being calculated if user passed own additional_objectives :pr:`276`
     * Changes

diff --git a/evalml/automl/auto_base.py b/evalml/automl/auto_base.py
@@ -24,7 +24,7 @@ class AutoBase:
 
     def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time,
                  patience, tolerance, model_types, detect_label_leakage, start_iteration_callback,
-                 add_result_callback, additional_objectives, random_state, verbose):
+                 add_result_callback, additional_objectives, random_state, n_jobs, verbose):
         if tuner is None:
             tuner = SKOptTuner
         self.objective = get_objective(objective)
@@ -72,9 +72,12 @@ def __init__(self, problem_type, tuner, cv, objective, max_pipelines, max_time,
             'search_order': []
         }
         self.trained_pipelines = {}
+
         self.random_state = random_state
         random.seed(self.random_state)
         np.random.seed(seed=self.random_state)
+
+        self.n_jobs = n_jobs
         self.possible_model_types = list(set([p.model_type for p in self.possible_pipelines]))
 
         self.tuners = {}
@@ -227,7 +230,7 @@ def _do_iteration(self, X, y, pbar, raise_errors):
         pipeline = pipeline_class(
             objective=self.objective,
             random_state=self.random_state,
-            n_jobs=-1,
+            n_jobs=self.n_jobs,
             number_features=X.shape[1],
             **dict(parameters)
         )

diff --git a/evalml/automl/auto_classification_search.py b/evalml/automl/auto_classification_search.py
@@ -25,6 +25,7 @@ def __init__(self,
                  add_result_callback=None,
                  additional_objectives=None,
                  random_state=0,
+                 n_jobs=-1,
                  verbose=True):
         """Automated classifier pipeline search
 
@@ -68,6 +69,9 @@ def __init__(self,
 
             random_state (int): the random_state
 
+            n_jobs (int or None): Non-negative integer describing level of parallelism used for pipelines.
+                None and 1 are equivalent. If set to -1, all CPUs are used. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used.
+
             verbose (boolean): If True, turn verbosity on. Defaults to True
         """
 
@@ -99,6 +103,7 @@ def __init__(self,
             add_result_callback=add_result_callback,
             additional_objectives=additional_objectives,
             random_state=random_state,
+            n_jobs=n_jobs,
             verbose=verbose
         )
 

diff --git a/evalml/automl/auto_regression_search.py b/evalml/automl/auto_regression_search.py
@@ -24,6 +24,7 @@ def __init__(self,
                  add_result_callback=None,
                  additional_objectives=None,
                  random_state=0,
+                 n_jobs=-1,
                  verbose=True):
         """Automated regressors pipeline search
 
@@ -65,6 +66,9 @@ def __init__(self,
 
             random_state (int): the random_state
 
+            n_jobs (int or None): Non-negative integer describing level of parallelism used for pipelines.
+                None and 1 are equivalent. If set to -1, all CPUs are used. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used.
+
             verbose (boolean): If True, turn verbosity on. Defaults to True
 
         """
@@ -91,5 +95,6 @@ def __init__(self,
             add_result_callback=add_result_callback,
             additional_objectives=additional_objectives,
             random_state=random_state,
+            n_jobs=n_jobs,
             verbose=verbose
         )
diff --git a/evalml/pipelines/classification/catboost.py b/evalml/pipelines/classification/catboost.py
@@ -25,7 +25,7 @@ class CatBoostClassificationPipeline(PipelineBase):
 
     def __init__(self, objective, impute_strategy, n_estimators,
                  eta, max_depth, number_features, bootstrap_type=None,
-                 n_jobs=1, random_state=0):
+                 n_jobs=-1, random_state=0):
         # note: impute_strategy must support both string and numeric data
         imputer = SimpleImputer(impute_strategy=impute_strategy)
         estimator = CatBoostClassifier(n_estimators=n_estimators,
@@ -35,5 +35,5 @@ def __init__(self, objective, impute_strategy, n_estimators,
                                        random_state=random_state)
         super().__init__(objective=objective,
                          component_list=[imputer, estimator],
-                         n_jobs=1,
+                         n_jobs=n_jobs,
                          random_state=random_state)
diff --git a/evalml/pipelines/classification/logistic_regression.py b/evalml/pipelines/classification/logistic_regression.py
@@ -32,7 +32,7 @@ def __init__(self, objective, penalty, C, impute_strategy,
         estimator = LogisticRegressionClassifier(random_state=random_state,
                                                  penalty=penalty,
                                                  C=C,
-                                                 n_jobs=-1)
+                                                 n_jobs=n_jobs)
 
         super().__init__(objective=objective,
                          component_list=[enc, imputer, scaler, estimator],

diff --git a/evalml/pipelines/pipeline_base.py b/evalml/pipelines/pipeline_base.py
@@ -44,7 +44,11 @@ def __init__(self, objective, component_list, n_jobs, random_state):
 
         self.name = self._generate_name()  # autogenerated
         self.results = {}
+
         self.n_jobs = n_jobs
+        if not isinstance(n_jobs, (int, type(None))) or n_jobs == 0:
+            raise ValueError('n_jobs must be an non-zero integer or None. n_jobs is set to `{}`.'.format(n_jobs))
+
         self.parameters = {}
         for component in self.component_list:
             self.parameters.update(component.parameters)

diff --git a/evalml/pipelines/regression/catboost.py b/evalml/pipelines/regression/catboost.py
@@ -35,5 +35,5 @@ def __init__(self, objective, impute_strategy, n_estimators, eta,
                                       random_state=random_state)
         super().__init__(objective=objective,
                          component_list=[imputer, estimator],
-                         n_jobs=1,
+                         n_jobs=n_jobs,
                          random_state=random_state)
diff --git a/evalml/tests/automl_tests/test_auto_classification_search.py b/evalml/tests/automl_tests/test_auto_classification_search.py
@@ -24,7 +24,9 @@
 def test_init(X_y):
     X, y = X_y
 
-    automl = AutoClassificationSearch(multiclass=False, max_pipelines=1)
+    automl = AutoClassificationSearch(multiclass=False, max_pipelines=1, n_jobs=4)
+
+    assert automl.n_jobs == 4
 
     # check loads all pipelines
     assert get_pipelines(problem_type=ProblemTypes.BINARY) == automl.possible_pipelines
@@ -34,7 +36,7 @@ def test_init(X_y):
     assert isinstance(automl.rankings, pd.DataFrame)
     assert isinstance(automl.best_pipeline, PipelineBase)
     assert isinstance(automl.best_pipeline.feature_importances, pd.DataFrame)
-
+    assert automl.best_pipeline.n_jobs == 4
     # test with datafarmes
     automl.search(pd.DataFrame(X), pd.Series(y))
 

diff --git a/evalml/tests/automl_tests/test_auto_regression_search.py b/evalml/tests/automl_tests/test_auto_regression_search.py
@@ -18,7 +18,9 @@ def X_y():
 def test_init(X_y):
     X, y = X_y
 
-    automl = AutoRegressionSearch(objective="R2", max_pipelines=3)
+    automl = AutoRegressionSearch(objective="R2", max_pipelines=3, n_jobs=4)
+
+    assert automl.n_jobs == 4
 
     # check loads all pipelines
     assert get_pipelines(problem_type=ProblemTypes.REGRESSION) == automl.possible_pipelines
@@ -29,6 +31,7 @@ def test_init(X_y):
 
     assert isinstance(automl.best_pipeline, PipelineBase)
     assert isinstance(automl.best_pipeline.feature_importances, pd.DataFrame)
+    assert automl.best_pipeline.n_jobs == 4
 
     # test with datafarmes
     automl.search(pd.DataFrame(X), pd.Series(y), raise_errors=True)

diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py
@@ -166,3 +166,22 @@ def test_multiple_feature_selectors(X_y):
     clf.fit(X, y)
     clf.score(X, y)
     assert not clf.feature_importances.isnull().all().all()
+
+
+def test_n_jobs(X_y):
+    with pytest.raises(ValueError, match='n_jobs must be an non-zero integer*.'):
+        PipelineBase('precision', component_list=['Simple Imputer', 'One Hot Encoder', StandardScaler(), 'Logistic Regression Classifier'],
+                     n_jobs='5', random_state=0)
+
+    with pytest.raises(ValueError, match='n_jobs must be an non-zero integer*.'):
+        PipelineBase('precision', component_list=['Simple Imputer', 'One Hot Encoder', StandardScaler(), 'Logistic Regression Classifier'],
+                     n_jobs=0, random_state=0)
+
+    assert PipelineBase('precision', component_list=['Simple Imputer', 'One Hot Encoder', StandardScaler(), 'Logistic Regression Classifier'],
+                        n_jobs=-4, random_state=0)
+
+    assert PipelineBase('precision', component_list=['Simple Imputer', 'One Hot Encoder', StandardScaler(), 'Logistic Regression Classifier'],
+                        n_jobs=4, random_state=0)
+
+    assert PipelineBase('precision', component_list=['Simple Imputer', 'One Hot Encoder', StandardScaler(), 'Logistic Regression Classifier'],
+                        n_jobs=None, random_state=0)