alteryx · angela97lin · Jul 16, 2020 · Jul 14, 2020 · Jul 14, 2020 · Jul 14, 2020
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -16,6 +16,7 @@ Changelog
         * Rename `master` branch to `main` :pr:`918`
         * Add pypi release github action :pr:`923`
         * Updated AutoMLSearch.search stdout output and logging and removed tqdm progress bar :pr:`921`
+        * Moved automl config checks previously in `search()` to init :pr:`933`
     * Documentation Changes
     * Testing Changes
         * Cleaned up fixture names and usages in tests :pr:`895`

diff --git a/evalml/automl/automl_search.py b/evalml/automl/automl_search.py
@@ -193,6 +193,8 @@ def __init__(self,
         self._automl_algorithm = None
         self._start = None
 
+        self._validate_problem_type()
+
     @property
     def data_check_results(self):
         return self._data_check_results
@@ -339,7 +341,6 @@ def search(self, X, y, data_checks="auto", feature_types=None, raise_errors=True
         logger.debug(f"allowed_pipelines set to {[pipeline.name for pipeline in self.allowed_pipelines]}")
         logger.debug(f"allowed_model_families set to {self.allowed_model_families}")
 
-        self._validate_problem_type()
         self._automl_algorithm = IterativeAlgorithm(
             max_pipelines=self.max_pipelines,
             allowed_pipelines=self.allowed_pipelines,
@@ -445,7 +446,7 @@ def _validate_problem_type(self):
             if obj.problem_type != self.problem_type:
                 raise ValueError("Additional objective {} is not compatible with a {} problem.".format(obj.name, self.problem_type.value))
 
-        for pipeline in self.allowed_pipelines:
+        for pipeline in self.allowed_pipelines or []:
             if not pipeline.problem_type == self.problem_type:
                 raise ValueError("Given pipeline {} is not compatible with problem_type {}.".format(pipeline.name, self.problem_type.value))
 
@@ -483,15 +484,8 @@ def _compute_cv_scores(self, pipeline, X, y, raise_errors=True):
         logger.info("\tStarting cross validation")
         for i, (train, test) in enumerate(self.data_split.split(X, y)):
             logger.debug(f"\t\tTraining and scoring on fold {i}")
-            if isinstance(X, pd.DataFrame):
-                X_train, X_test = X.iloc[train], X.iloc[test]
-            else:
-                X_train, X_test = X[train], X[test]
-            if isinstance(y, pd.Series):
-                y_train, y_test = y.iloc[train], y.iloc[test]
-            else:
-                y_train, y_test = y[train], y[test]
-
+            X_train, X_test = X.iloc[train], X.iloc[test]
+            y_train, y_test = y.iloc[train], y.iloc[test]
             objectives_to_score = [self.objective] + self.additional_objectives
             try:
                 X_threshold_tuning = None

diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py
@@ -571,14 +571,15 @@ def test_large_dataset_regression(mock_score):
         assert automl.results['pipeline_results'][pipeline_id]['cv_data'][0]['score'] == 1.234
 
 
-def test_allowed_pipelines_with_incorrect_problem_type(X_y_regression, dummy_binary_pipeline_class):
-    X, y = X_y_regression
-    auto = AutoMLSearch(problem_type='regression', allowed_pipelines=[dummy_binary_pipeline_class])
+def test_allowed_pipelines_with_incorrect_problem_type(dummy_binary_pipeline_class):
+    # checks that not setting allowed_pipelines does not error out
+    AutoMLSearch(problem_type='binary')
+
     with pytest.raises(ValueError, match="is not compatible with problem_type"):
-        auto.search(X, y)
+        AutoMLSearch(problem_type='regression', allowed_pipelines=[dummy_binary_pipeline_class])
 
 
-def test_obj_matches_problem_type():
+def test_main_objective_problem_type_mismatch():
     with pytest.raises(ValueError, match="is not compatible with a"):
         AutoMLSearch(problem_type='binary', objective='R2')
 
@@ -609,20 +610,10 @@ def test_checks_at_search_time(mock_search, dummy_regression_pipeline_class, X_y
     with pytest.raises(ValueError, match=error_text):
         error_automl.search(X, y)
 
-    error_text = "in search, problem_type mismatches allowed_pipelines."
-    mock_search.side_effect = ValueError(error_text)
 
-    allowed_pipelines = [dummy_regression_pipeline_class]
-    error_automl = AutoMLSearch(problem_type='binary', allowed_pipelines=allowed_pipelines)
-    with pytest.raises(ValueError, match=error_text):
-        error_automl.search(X, y)
-
-
-def test_objective_at_search_time(X_y_multi):
-    X, y = X_y_multi
-    error_automl = AutoMLSearch(problem_type='multiclass', additional_objectives=['Precision', 'AUC'],)
+def test_incompatible_additional_objectives():
     with pytest.raises(ValueError, match="is not compatible with a "):
-        error_automl.search(X, y)
+        AutoMLSearch(problem_type='multiclass', additional_objectives=['Precision', 'AUC'])
 
 
 def test_default_objective():