alteryx · christopherbunn · Oct 27, 2020 · Oct 27, 2020 · Oct 27, 2020 · dsherry
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -14,6 +14,7 @@ Release Notes
         * Updated ``AutoMLSearch`` to support ``Woodwork`` data structures :pr:`1299`
         * Added cv_folds to ``ClassImbalanceDataCheck`` and added this check to ``DefaultDataChecks`` :pr:`1333`
         * Make ``max_batches`` argument to ``AutoMLSearch.search`` public :pr:`1320`
+        * Added ``_pipelines_per_batch`` as a private argument to ``AutoMLSearch`` :pr:`1355`
     * Fixes
         * Fixed ML performance issue with ordered datasets: always shuffle data in automl's default CV splits :pr:`1265`
         * Fixed broken ``evalml info`` CLI command :pr:`1293`

diff --git a/evalml/automl/automl_search.py b/evalml/automl/automl_search.py
@@ -89,7 +89,8 @@ def __init__(self,
                  verbose=True,
                  optimize_thresholds=False,
                  ensembling=False,
-                 max_batches=None):
+                 max_batches=None,
+                 _pipelines_per_batch=5):
         """Automated pipeline search
 
         Arguments:
@@ -149,6 +150,9 @@ def __init__(self,
 
             max_batches (int): The maximum number of batches of pipelines to search. Parameters max_time, and
                 max_iterations have precedence over stopping the search.
+
+            _pipelines_per_batch (int): The number of pipelines to train for every batch after the first one.
+                The first batch will train a baseline pipline + one of each pipeline family allowed in the search.
         """
         try:
             self.problem_type = handle_problem_types(problem_type)
@@ -191,9 +195,7 @@ def __init__(self,
         if max_batches is not None and max_batches <= 0:
             raise ValueError(f"Parameter max batches must be None or non-negative. Received {max_batches}.")
         self.max_batches = max_batches
-        # This is the default value for IterativeAlgorithm - setting this explicitly makes sure that
-        # the behavior of max_batches does not break if IterativeAlgorithm is changed.
-        self._pipelines_per_batch = 5
+        self._pipelines_per_batch = _pipelines_per_batch
 
         self.max_iterations = max_iterations
         if not self.max_iterations and not self.max_time and not self.max_batches:

diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py
@@ -1458,3 +1458,33 @@ def test_input_not_woodwork_logs_warning(mock_fit, mock_score, caplog, X_y_binar
     automl.search(X, y)
     assert "`X` passed was not a DataTable. EvalML will try to convert the input as a Woodwork DataTable and types will be inferred. To control this behavior, please pass in a Woodwork DataTable instead." in caplog.text
     assert "`y` passed was not a DataColumn. EvalML will try to convert the input as a Woodwork DataTable and types will be inferred. To control this behavior, please pass in a Woodwork DataTable instead." in caplog.text
+
+
+@patch('evalml.pipelines.BinaryClassificationPipeline.score', return_value={"Log Loss Binary": 0.8})
+@patch('evalml.pipelines.BinaryClassificationPipeline.fit')
+def test_pipelines_per_batch(mock_fit, mock_score, X_y_binary):
+    def total_pipelines(automl, num_batches, batch_size):
+        total = 1 + len(automl.allowed_pipelines)
+        total += ((num_batches - 1) * batch_size)
+        return total
+
+    X, y = X_y_binary
+
+    # Checking for default of _pipelines_per_batch
+    automl = AutoMLSearch(problem_type='binary', max_batches=2)
+    automl.search(X, y)
+    assert automl._pipelines_per_batch == 5
+    assert automl._automl_algorithm.pipelines_per_batch == 5
+    assert total_pipelines(automl, 2, 5) == len(automl.full_rankings)
+
+    automl = AutoMLSearch(problem_type='binary', max_batches=1, _pipelines_per_batch=2)
+    automl.search(X, y)
+    assert automl._pipelines_per_batch == 2
+    assert automl._automl_algorithm.pipelines_per_batch == 2
+    assert total_pipelines(automl, 1, 2) == len(automl.full_rankings)
+
+    automl = AutoMLSearch(problem_type='binary', max_batches=2, _pipelines_per_batch=10)
+    automl.search(X, y)
+    assert automl._pipelines_per_batch == 10
+    assert automl._automl_algorithm.pipelines_per_batch == 10
+    assert total_pipelines(automl, 2, 10) == len(automl.full_rankings)