Standardize error when calling transform/predict before fit for pipel…

…ines (#1048) * init * add metaclass subclasses * remove stored err * add test * update file hierarchy
alteryx · Aug 17, 2020 · c0ad9f8 · c0ad9f8
1 parent 2ecf3bd
commit c0ad9f8
Show file tree

Hide file tree

Showing 10 changed files with 162 additions and 65 deletions.
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -8,6 +8,7 @@ Release Notes
         * Added `explain_predictions` and `explain_predictions_best_worst` for explaining multiple predictions with SHAP :pr:`1016`
         * Added new LSA component for text featurization :pr:`1022`
         * Added guide on installing with conda :pr:`1041`
+        * Standardized error when calling transform/predict before fit for pipelines :pr:`1048`
     * Fixes
         * Updated TextFeaturizer component to no longer require an internet connection to run :pr:`1022`
         * Fixed non-deterministic element of TextFeaturizer transformations :pr:`1022`

diff --git a/evalml/exceptions/exceptions.py b/evalml/exceptions/exceptions.py
@@ -28,6 +28,11 @@ class ComponentNotYetFittedError(Exception):
     pass
 
 
+class PipelineNotYetFittedError(Exception):
+    """An exception to be raised when predict/predict_proba/transform is called on a pipeline without fitting first."""
+    pass
+
+
 class AutoMLSearchException(Exception):
     """Exception raised when all pipelines in an automl batch return a score of NaN for the primary objective."""
     pass

diff --git a/evalml/pipelines/components/component_base.py b/evalml/pipelines/components/component_base.py
@@ -1,13 +1,10 @@
 import copy
-from abc import ABC, ABCMeta, abstractmethod
-from functools import wraps
+from abc import ABC, abstractmethod
 
 import cloudpickle
 
-from evalml.exceptions import (
-    ComponentNotYetFittedError,
-    MethodPropertyNotFoundError
-)
+from evalml.exceptions import MethodPropertyNotFoundError
+from evalml.pipelines.components.component_base_meta import ComponentBaseMeta
 from evalml.utils import (
     classproperty,
     get_logger,
@@ -18,55 +15,6 @@
 logger = get_logger(__file__)
 
 
-class ComponentBaseMeta(ABCMeta):
-    """Metaclass that overrides creating a new component by wrapping method with validators and setters"""
-    from evalml.exceptions import ComponentNotYetFittedError
-
-    @classmethod
-    def set_fit(cls, method):
-        @wraps(method)
-        def _set_fit(self, X, y=None):
-            return_value = method(self, X, y)
-            self._is_fitted = True
-            return return_value
-        return _set_fit
-
-    @classmethod
-    def check_for_fit(cls, method):
-        """`check_for_fit` wraps a method that validates if `self._is_fitted` is `True`.
-            It raises an exception if `False` and calls and returns the wrapped method if `True`.
-        """
-        @wraps(method)
-        def _check_for_fit(self, X=None, y=None):
-            klass = type(self).__name__
-            if not self._is_fitted and self.needs_fitting:
-                raise ComponentNotYetFittedError(f'This {klass} is not fitted yet. You must fit {klass} before calling {method.__name__}.')
-            elif X is None and y is None:
-                return method(self)
-            elif y is None:
-                return method(self, X)
-            else:
-                return method(self, X, y)
-        return _check_for_fit
-
-    def __new__(cls, name, bases, dct):
-        if 'predict' in dct:
-            dct['predict'] = cls.check_for_fit(dct['predict'])
-        if 'predict_proba' in dct:
-            dct['predict_proba'] = cls.check_for_fit(dct['predict_proba'])
-        if 'transform' in dct:
-            dct['transform'] = cls.check_for_fit(dct['transform'])
-        if 'feature_importance' in dct:
-            fi = dct['feature_importance']
-            new_fi = property(cls.check_for_fit(fi.__get__), fi.__set__, fi.__delattr__)
-            dct['feature_importance'] = new_fi
-        if 'fit' in dct:
-            dct['fit'] = cls.set_fit(dct['fit'])
-        if 'fit_transform' in dct:
-            dct['fit_transform'] = cls.set_fit(dct['fit_transform'])
-        return super().__new__(cls, name, bases, dct)
-
-
 class ComponentBase(ABC, metaclass=ComponentBaseMeta):
     """Base class for all components."""
     _default_parameters = None

diff --git a/evalml/pipelines/components/component_base_meta.py b/evalml/pipelines/components/component_base_meta.py
@@ -0,0 +1,28 @@
+
+
+from functools import wraps
+
+from evalml.exceptions import ComponentNotYetFittedError
+from evalml.utils.base_meta import BaseMeta
+
+
+class ComponentBaseMeta(BaseMeta):
+    """Metaclass that overrides creating a new component by wrapping methods with validators and setters"""
+
+    @classmethod
+    def check_for_fit(cls, method):
+        """`check_for_fit` wraps a method that validates if `self._is_fitted` is `True`.
+            It raises an exception if `False` and calls and returns the wrapped method if `True`.
+        """
+        @wraps(method)
+        def _check_for_fit(self, X=None, y=None):
+            klass = type(self).__name__
+            if not self._is_fitted and self.needs_fitting:
+                raise ComponentNotYetFittedError(f'This {klass} is not fitted yet. You must fit {klass} before calling {method.__name__}.')
+            elif X is None and y is None:
+                return method(self)
+            elif y is None:
+                return method(self, X)
+            else:
+                return method(self, X, y)
+        return _check_for_fit
diff --git a/evalml/pipelines/pipeline_base.py b/evalml/pipelines/pipeline_base.py
@@ -18,6 +18,7 @@
     MissingComponentError,
     PipelineScoreError
 )
+from evalml.pipelines.pipeline_base_meta import PipelineBaseMeta
 from evalml.utils import (
     classproperty,
     get_logger,
@@ -30,7 +31,7 @@
 logger = get_logger(__file__)
 
 
-class PipelineBase(ABC):
+class PipelineBase(ABC, metaclass=PipelineBaseMeta):
     """Base class for all pipelines."""
 
     @property
@@ -67,6 +68,7 @@ def __init__(self, parameters, random_state=0):
             raise ValueError("A pipeline must have an Estimator as the last component in component_graph.")
 
         self._validate_estimator_problem_type()
+        self._is_fitted = False
 
     @classproperty
     def name(cls):

diff --git a/evalml/pipelines/pipeline_base_meta.py b/evalml/pipelines/pipeline_base_meta.py
@@ -0,0 +1,28 @@
+
+
+from functools import wraps
+
+from evalml.exceptions import PipelineNotYetFittedError
+from evalml.utils.base_meta import BaseMeta
+
+
+class PipelineBaseMeta(BaseMeta):
+    """Metaclass that overrides creating a new pipeline by wrapping methods with validators and setters"""
+
+    @classmethod
+    def check_for_fit(cls, method):
+        """`check_for_fit` wraps a method that validates if `self._is_fitted` is `True`.
+            It raises an exception if `False` and calls and returns the wrapped method if `True`.
+        """
+        @wraps(method)
+        def _check_for_fit(self, X=None, y=None):
+            klass = type(self).__name__
+            if not self._is_fitted:
+                raise PipelineNotYetFittedError(f'This {klass} is not fitted yet. You must fit {klass} before calling {method.__name__}.')
+            elif X is None and y is None:
+                return method(self)
+            elif y is None:
+                return method(self, X)
+            else:
+                return method(self, X, y)
+        return _check_for_fit
diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py
@@ -160,6 +160,10 @@ class MockEstimator(Estimator):
 
         def __init__(self, random_state=0):
             super().__init__(parameters={}, component_obj=None, random_state=random_state)
+
+        def fit(self, X, y):
+            return self
+
     return MockEstimator
 
 
@@ -196,6 +200,9 @@ class MockRegressor(Estimator):
         def __init__(self, random_state=0):
             super().__init__(parameters={}, component_obj=None, random_state=random_state)
 
+        def fit(self, X, y):
+            return self
+
     return MockRegressor
 
 

diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_binary_classification.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_binary_classification.py
@@ -8,16 +8,15 @@
 @patch('evalml.objectives.BinaryClassificationObjective.decision_function')
 @patch('evalml.pipelines.components.Estimator.predict_proba')
 @patch('evalml.pipelines.components.Estimator.predict')
-@patch('evalml.pipelines.ClassificationPipeline.fit')
-def test_binary_classification_pipeline_predict(mock_fit,
-                                                mock_predict, mock_predict_proba,
+def test_binary_classification_pipeline_predict(mock_predict, mock_predict_proba,
                                                 mock_obj_decision, mock_decode,
                                                 X_y_binary, dummy_binary_pipeline_class):
     mock_objs = [mock_decode, mock_predict]
     mock_decode.return_value = [0, 1]
     X, y = X_y_binary
     binary_pipeline = dummy_binary_pipeline_class(parameters={})
     # test no objective passed and no custom threshold uses underlying estimator's predict method
+    binary_pipeline.fit(X, y)
     binary_pipeline.predict(X)
     for mock_obj in mock_objs:
         mock_obj.assert_called()
@@ -64,6 +63,7 @@ def test_binary_classification_pipeline_predict(mock_fit,
 def test_binary_predict_pipeline_objective_mismatch(mock_transform, X_y_binary, dummy_binary_pipeline_class):
     X, y = X_y_binary
     binary_pipeline = dummy_binary_pipeline_class(parameters={})
+    binary_pipeline.fit(X, y)
     with pytest.raises(ValueError, match="You can only use a binary classification objective to make predictions for a binary classification pipeline."):
         binary_pipeline.predict(X, "precision_micro")
     mock_transform.assert_called()
diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py
@@ -9,9 +9,9 @@
 
 from evalml.demos import load_breast_cancer, load_wine
 from evalml.exceptions import (
-    ComponentNotYetFittedError,
     IllFormattedClassNameError,
     MissingComponentError,
+    PipelineNotYetFittedError,
     PipelineScoreError
 )
 from evalml.model_family import ModelFamily
@@ -536,8 +536,8 @@ class MockMulticlassClassificationPipeline(MulticlassClassificationPipeline):
 
 @patch('evalml.pipelines.RegressionPipeline.fit')
 @patch('evalml.pipelines.RegressionPipeline.predict')
-def test_score_regression_single(mock_predict, mock_fit, X_y_binary):
-    X, y = X_y_binary
+def test_score_regression_single(mock_predict, mock_fit, X_y_regression):
+    X, y = X_y_regression
     mock_predict.return_value = y
     clf = make_mock_regression_pipeline()
     clf.fit(X, y)
@@ -810,9 +810,13 @@ def test_score_with_objective_that_requires_predict_proba(mock_predict, dummy_re
     mock_predict.return_value = np.array([1] * 100)
     # Using pytest.raises to make sure we error if an error is not thrown.
     with pytest.raises(PipelineScoreError):
-        dummy_regression_pipeline_class(parameters={}).score(X, y, ['precision', 'auc'])
+        clf = dummy_regression_pipeline_class(parameters={})
+        clf.fit(X, y)
+        clf.score(X, y, ['precision', 'auc'])
     try:
-        dummy_regression_pipeline_class(parameters={}).score(X, y, ['precision', 'auc'])
+        clf = dummy_regression_pipeline_class(parameters={})
+        clf.fit(X, y)
+        clf.score(X, y, ['precision', 'auc'])
     except PipelineScoreError as e:
         assert "Invalid objective AUC specified for problem type Regression" in e.message
         assert "Invalid objective Precision specified for problem type Regression" in e.message
@@ -919,7 +923,7 @@ def test_clone_fitted(X_y_binary, logistic_regression_binary_pipeline_class):
     pipeline_clone = pipeline.clone(random_state=42)
     assert pipeline_clone.random_state.randint(2**30) == random_state_first_val
     assert pipeline.parameters == pipeline_clone.parameters
-    with pytest.raises(ComponentNotYetFittedError):
+    with pytest.raises(PipelineNotYetFittedError):
         pipeline_clone.predict(X)
     pipeline_clone.fit(X, y)
     X_t_clone = pipeline_clone.predict_proba(X)
@@ -1017,3 +1021,43 @@ def test_targets_data_types_classification_pipelines(problem_type, target_type,
         assert set(predictions.unique()).issubset(unique_vals)
         predict_proba = pipeline.predict_proba(X)
         assert set(predict_proba.columns) == set(unique_vals)
+
+
+@patch('evalml.pipelines.PipelineBase.fit')
+@pytest.mark.parametrize("problem_type", [ProblemTypes.BINARY, ProblemTypes.MULTICLASS, ProblemTypes.REGRESSION])
+def test_pipeline_not_fitted_error(mock_fit, problem_type, X_y_binary, X_y_multi, X_y_regression,
+                                   logistic_regression_binary_pipeline_class,
+                                   logistic_regression_multiclass_pipeline_class,
+                                   linear_regression_pipeline_class):
+    if problem_type == ProblemTypes.BINARY:
+        X, y = X_y_binary
+        clf = logistic_regression_binary_pipeline_class(parameters={})
+    elif problem_type == ProblemTypes.MULTICLASS:
+        X, y = X_y_multi
+        clf = logistic_regression_multiclass_pipeline_class(parameters={})
+    elif problem_type == ProblemTypes.REGRESSION:
+        X, y = X_y_regression
+        clf = linear_regression_pipeline_class(parameters={})
+
+    with pytest.raises(PipelineNotYetFittedError):
+        clf.predict(X)
+    with pytest.raises(PipelineNotYetFittedError):
+        clf.feature_importance
+
+    if problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]:
+        with pytest.raises(PipelineNotYetFittedError):
+            clf.predict_proba(X)
+
+    clf.fit(X, y)
+    if problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]:
+        with patch('evalml.pipelines.ClassificationPipeline.predict') as mock_predict:
+            clf.predict(X)
+            mock_predict.assert_called()
+        with patch('evalml.pipelines.ClassificationPipeline.predict_proba') as mock_predict_proba:
+            clf.predict_proba(X)
+            mock_predict_proba.assert_called()
+    else:
+        with patch('evalml.pipelines.RegressionPipeline.predict') as mock_predict:
+            clf.predict(X)
+            mock_predict.assert_called()
+    clf.feature_importance
diff --git a/evalml/utils/base_meta.py b/evalml/utils/base_meta.py
@@ -0,0 +1,34 @@
+
+
+from abc import ABCMeta
+from functools import wraps
+
+
+class BaseMeta(ABCMeta):
+    """Metaclass that overrides creating a new component or pipeline by wrapping methods with validators and setters"""
+
+    @classmethod
+    def set_fit(cls, method):
+        @wraps(method)
+        def _set_fit(self, X, y=None):
+            return_value = method(self, X, y)
+            self._is_fitted = True
+            return return_value
+        return _set_fit
+
+    def __new__(cls, name, bases, dct):
+        if 'predict' in dct:
+            dct['predict'] = cls.check_for_fit(dct['predict'])
+        if 'predict_proba' in dct:
+            dct['predict_proba'] = cls.check_for_fit(dct['predict_proba'])
+        if 'transform' in dct:
+            dct['transform'] = cls.check_for_fit(dct['transform'])
+        if 'feature_importance' in dct:
+            fi = dct['feature_importance']
+            new_fi = property(cls.check_for_fit(fi.__get__), fi.__set__, fi.__delattr__)
+            dct['feature_importance'] = new_fi
+        if 'fit' in dct:
+            dct['fit'] = cls.set_fit(dct['fit'])
+        if 'fit_transform' in dct:
+            dct['fit_transform'] = cls.set_fit(dct['fit_transform'])
+        return super().__new__(cls, name, bases, dct)