Skip to content

Commit

Permalink
Standardize error when calling transform/predict before fit for pipel…
Browse files Browse the repository at this point in the history
…ines (#1048)

* init

* add metaclass subclasses

* remove stored err

* add test

* update file hierarchy
  • Loading branch information
angela97lin committed Aug 17, 2020
1 parent 2ecf3bd commit c0ad9f8
Show file tree
Hide file tree
Showing 10 changed files with 162 additions and 65 deletions.
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Release Notes
* Added `explain_predictions` and `explain_predictions_best_worst` for explaining multiple predictions with SHAP :pr:`1016`
* Added new LSA component for text featurization :pr:`1022`
* Added guide on installing with conda :pr:`1041`
* Standardized error when calling transform/predict before fit for pipelines :pr:`1048`
* Fixes
* Updated TextFeaturizer component to no longer require an internet connection to run :pr:`1022`
* Fixed non-deterministic element of TextFeaturizer transformations :pr:`1022`
Expand Down
5 changes: 5 additions & 0 deletions evalml/exceptions/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ class ComponentNotYetFittedError(Exception):
pass


class PipelineNotYetFittedError(Exception):
"""An exception to be raised when predict/predict_proba/transform is called on a pipeline without fitting first."""
pass


class AutoMLSearchException(Exception):
"""Exception raised when all pipelines in an automl batch return a score of NaN for the primary objective."""
pass
Expand Down
58 changes: 3 additions & 55 deletions evalml/pipelines/components/component_base.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import copy
from abc import ABC, ABCMeta, abstractmethod
from functools import wraps
from abc import ABC, abstractmethod

import cloudpickle

from evalml.exceptions import (
ComponentNotYetFittedError,
MethodPropertyNotFoundError
)
from evalml.exceptions import MethodPropertyNotFoundError
from evalml.pipelines.components.component_base_meta import ComponentBaseMeta
from evalml.utils import (
classproperty,
get_logger,
Expand All @@ -18,55 +15,6 @@
logger = get_logger(__file__)


class ComponentBaseMeta(ABCMeta):
"""Metaclass that overrides creating a new component by wrapping method with validators and setters"""
from evalml.exceptions import ComponentNotYetFittedError

@classmethod
def set_fit(cls, method):
@wraps(method)
def _set_fit(self, X, y=None):
return_value = method(self, X, y)
self._is_fitted = True
return return_value
return _set_fit

@classmethod
def check_for_fit(cls, method):
"""`check_for_fit` wraps a method that validates if `self._is_fitted` is `True`.
It raises an exception if `False` and calls and returns the wrapped method if `True`.
"""
@wraps(method)
def _check_for_fit(self, X=None, y=None):
klass = type(self).__name__
if not self._is_fitted and self.needs_fitting:
raise ComponentNotYetFittedError(f'This {klass} is not fitted yet. You must fit {klass} before calling {method.__name__}.')
elif X is None and y is None:
return method(self)
elif y is None:
return method(self, X)
else:
return method(self, X, y)
return _check_for_fit

def __new__(cls, name, bases, dct):
if 'predict' in dct:
dct['predict'] = cls.check_for_fit(dct['predict'])
if 'predict_proba' in dct:
dct['predict_proba'] = cls.check_for_fit(dct['predict_proba'])
if 'transform' in dct:
dct['transform'] = cls.check_for_fit(dct['transform'])
if 'feature_importance' in dct:
fi = dct['feature_importance']
new_fi = property(cls.check_for_fit(fi.__get__), fi.__set__, fi.__delattr__)
dct['feature_importance'] = new_fi
if 'fit' in dct:
dct['fit'] = cls.set_fit(dct['fit'])
if 'fit_transform' in dct:
dct['fit_transform'] = cls.set_fit(dct['fit_transform'])
return super().__new__(cls, name, bases, dct)


class ComponentBase(ABC, metaclass=ComponentBaseMeta):
"""Base class for all components."""
_default_parameters = None
Expand Down
28 changes: 28 additions & 0 deletions evalml/pipelines/components/component_base_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@


from functools import wraps

from evalml.exceptions import ComponentNotYetFittedError
from evalml.utils.base_meta import BaseMeta


class ComponentBaseMeta(BaseMeta):
"""Metaclass that overrides creating a new component by wrapping methods with validators and setters"""

@classmethod
def check_for_fit(cls, method):
"""`check_for_fit` wraps a method that validates if `self._is_fitted` is `True`.
It raises an exception if `False` and calls and returns the wrapped method if `True`.
"""
@wraps(method)
def _check_for_fit(self, X=None, y=None):
klass = type(self).__name__
if not self._is_fitted and self.needs_fitting:
raise ComponentNotYetFittedError(f'This {klass} is not fitted yet. You must fit {klass} before calling {method.__name__}.')
elif X is None and y is None:
return method(self)
elif y is None:
return method(self, X)
else:
return method(self, X, y)
return _check_for_fit
4 changes: 3 additions & 1 deletion evalml/pipelines/pipeline_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
MissingComponentError,
PipelineScoreError
)
from evalml.pipelines.pipeline_base_meta import PipelineBaseMeta
from evalml.utils import (
classproperty,
get_logger,
Expand All @@ -30,7 +31,7 @@
logger = get_logger(__file__)


class PipelineBase(ABC):
class PipelineBase(ABC, metaclass=PipelineBaseMeta):
"""Base class for all pipelines."""

@property
Expand Down Expand Up @@ -67,6 +68,7 @@ def __init__(self, parameters, random_state=0):
raise ValueError("A pipeline must have an Estimator as the last component in component_graph.")

self._validate_estimator_problem_type()
self._is_fitted = False

@classproperty
def name(cls):
Expand Down
28 changes: 28 additions & 0 deletions evalml/pipelines/pipeline_base_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@


from functools import wraps

from evalml.exceptions import PipelineNotYetFittedError
from evalml.utils.base_meta import BaseMeta


class PipelineBaseMeta(BaseMeta):
"""Metaclass that overrides creating a new pipeline by wrapping methods with validators and setters"""

@classmethod
def check_for_fit(cls, method):
"""`check_for_fit` wraps a method that validates if `self._is_fitted` is `True`.
It raises an exception if `False` and calls and returns the wrapped method if `True`.
"""
@wraps(method)
def _check_for_fit(self, X=None, y=None):
klass = type(self).__name__
if not self._is_fitted:
raise PipelineNotYetFittedError(f'This {klass} is not fitted yet. You must fit {klass} before calling {method.__name__}.')
elif X is None and y is None:
return method(self)
elif y is None:
return method(self, X)
else:
return method(self, X, y)
return _check_for_fit
7 changes: 7 additions & 0 deletions evalml/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ class MockEstimator(Estimator):

def __init__(self, random_state=0):
super().__init__(parameters={}, component_obj=None, random_state=random_state)

def fit(self, X, y):
return self

return MockEstimator


Expand Down Expand Up @@ -196,6 +200,9 @@ class MockRegressor(Estimator):
def __init__(self, random_state=0):
super().__init__(parameters={}, component_obj=None, random_state=random_state)

def fit(self, X, y):
return self

return MockRegressor


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,15 @@
@patch('evalml.objectives.BinaryClassificationObjective.decision_function')
@patch('evalml.pipelines.components.Estimator.predict_proba')
@patch('evalml.pipelines.components.Estimator.predict')
@patch('evalml.pipelines.ClassificationPipeline.fit')
def test_binary_classification_pipeline_predict(mock_fit,
mock_predict, mock_predict_proba,
def test_binary_classification_pipeline_predict(mock_predict, mock_predict_proba,
mock_obj_decision, mock_decode,
X_y_binary, dummy_binary_pipeline_class):
mock_objs = [mock_decode, mock_predict]
mock_decode.return_value = [0, 1]
X, y = X_y_binary
binary_pipeline = dummy_binary_pipeline_class(parameters={})
# test no objective passed and no custom threshold uses underlying estimator's predict method
binary_pipeline.fit(X, y)
binary_pipeline.predict(X)
for mock_obj in mock_objs:
mock_obj.assert_called()
Expand Down Expand Up @@ -64,6 +63,7 @@ def test_binary_classification_pipeline_predict(mock_fit,
def test_binary_predict_pipeline_objective_mismatch(mock_transform, X_y_binary, dummy_binary_pipeline_class):
X, y = X_y_binary
binary_pipeline = dummy_binary_pipeline_class(parameters={})
binary_pipeline.fit(X, y)
with pytest.raises(ValueError, match="You can only use a binary classification objective to make predictions for a binary classification pipeline."):
binary_pipeline.predict(X, "precision_micro")
mock_transform.assert_called()
56 changes: 50 additions & 6 deletions evalml/tests/pipeline_tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@

from evalml.demos import load_breast_cancer, load_wine
from evalml.exceptions import (
ComponentNotYetFittedError,
IllFormattedClassNameError,
MissingComponentError,
PipelineNotYetFittedError,
PipelineScoreError
)
from evalml.model_family import ModelFamily
Expand Down Expand Up @@ -536,8 +536,8 @@ class MockMulticlassClassificationPipeline(MulticlassClassificationPipeline):

@patch('evalml.pipelines.RegressionPipeline.fit')
@patch('evalml.pipelines.RegressionPipeline.predict')
def test_score_regression_single(mock_predict, mock_fit, X_y_binary):
X, y = X_y_binary
def test_score_regression_single(mock_predict, mock_fit, X_y_regression):
X, y = X_y_regression
mock_predict.return_value = y
clf = make_mock_regression_pipeline()
clf.fit(X, y)
Expand Down Expand Up @@ -810,9 +810,13 @@ def test_score_with_objective_that_requires_predict_proba(mock_predict, dummy_re
mock_predict.return_value = np.array([1] * 100)
# Using pytest.raises to make sure we error if an error is not thrown.
with pytest.raises(PipelineScoreError):
dummy_regression_pipeline_class(parameters={}).score(X, y, ['precision', 'auc'])
clf = dummy_regression_pipeline_class(parameters={})
clf.fit(X, y)
clf.score(X, y, ['precision', 'auc'])
try:
dummy_regression_pipeline_class(parameters={}).score(X, y, ['precision', 'auc'])
clf = dummy_regression_pipeline_class(parameters={})
clf.fit(X, y)
clf.score(X, y, ['precision', 'auc'])
except PipelineScoreError as e:
assert "Invalid objective AUC specified for problem type Regression" in e.message
assert "Invalid objective Precision specified for problem type Regression" in e.message
Expand Down Expand Up @@ -919,7 +923,7 @@ def test_clone_fitted(X_y_binary, logistic_regression_binary_pipeline_class):
pipeline_clone = pipeline.clone(random_state=42)
assert pipeline_clone.random_state.randint(2**30) == random_state_first_val
assert pipeline.parameters == pipeline_clone.parameters
with pytest.raises(ComponentNotYetFittedError):
with pytest.raises(PipelineNotYetFittedError):
pipeline_clone.predict(X)
pipeline_clone.fit(X, y)
X_t_clone = pipeline_clone.predict_proba(X)
Expand Down Expand Up @@ -1017,3 +1021,43 @@ def test_targets_data_types_classification_pipelines(problem_type, target_type,
assert set(predictions.unique()).issubset(unique_vals)
predict_proba = pipeline.predict_proba(X)
assert set(predict_proba.columns) == set(unique_vals)


@patch('evalml.pipelines.PipelineBase.fit')
@pytest.mark.parametrize("problem_type", [ProblemTypes.BINARY, ProblemTypes.MULTICLASS, ProblemTypes.REGRESSION])
def test_pipeline_not_fitted_error(mock_fit, problem_type, X_y_binary, X_y_multi, X_y_regression,
logistic_regression_binary_pipeline_class,
logistic_regression_multiclass_pipeline_class,
linear_regression_pipeline_class):
if problem_type == ProblemTypes.BINARY:
X, y = X_y_binary
clf = logistic_regression_binary_pipeline_class(parameters={})
elif problem_type == ProblemTypes.MULTICLASS:
X, y = X_y_multi
clf = logistic_regression_multiclass_pipeline_class(parameters={})
elif problem_type == ProblemTypes.REGRESSION:
X, y = X_y_regression
clf = linear_regression_pipeline_class(parameters={})

with pytest.raises(PipelineNotYetFittedError):
clf.predict(X)
with pytest.raises(PipelineNotYetFittedError):
clf.feature_importance

if problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]:
with pytest.raises(PipelineNotYetFittedError):
clf.predict_proba(X)

clf.fit(X, y)
if problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]:
with patch('evalml.pipelines.ClassificationPipeline.predict') as mock_predict:
clf.predict(X)
mock_predict.assert_called()
with patch('evalml.pipelines.ClassificationPipeline.predict_proba') as mock_predict_proba:
clf.predict_proba(X)
mock_predict_proba.assert_called()
else:
with patch('evalml.pipelines.RegressionPipeline.predict') as mock_predict:
clf.predict(X)
mock_predict.assert_called()
clf.feature_importance
34 changes: 34 additions & 0 deletions evalml/utils/base_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@


from abc import ABCMeta
from functools import wraps


class BaseMeta(ABCMeta):
"""Metaclass that overrides creating a new component or pipeline by wrapping methods with validators and setters"""

@classmethod
def set_fit(cls, method):
@wraps(method)
def _set_fit(self, X, y=None):
return_value = method(self, X, y)
self._is_fitted = True
return return_value
return _set_fit

def __new__(cls, name, bases, dct):
if 'predict' in dct:
dct['predict'] = cls.check_for_fit(dct['predict'])
if 'predict_proba' in dct:
dct['predict_proba'] = cls.check_for_fit(dct['predict_proba'])
if 'transform' in dct:
dct['transform'] = cls.check_for_fit(dct['transform'])
if 'feature_importance' in dct:
fi = dct['feature_importance']
new_fi = property(cls.check_for_fit(fi.__get__), fi.__set__, fi.__delattr__)
dct['feature_importance'] = new_fi
if 'fit' in dct:
dct['fit'] = cls.set_fit(dct['fit'])
if 'fit_transform' in dct:
dct['fit_transform'] = cls.set_fit(dct['fit_transform'])
return super().__new__(cls, name, bases, dct)

0 comments on commit c0ad9f8

Please sign in to comment.