Raise ValueError when predict/predict_proba input types don't match f…

…it input (#3036) * initial impl * update release notes * fix infer * add tests and private method * fix tests * fix notebook * update code * remove unneeded files * add test * update test * address comments
alteryx · Nov 17, 2021 · 401457c · 401457c
1 parent d6682a4
commit 401457c
Show file tree

Hide file tree

Showing 7 changed files with 195 additions and 9 deletions.
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -7,6 +7,7 @@ Release Notes
         * Added support for ``scikit-learn > 1.0.0`` :pr:`3051`
         * Required the ``date_index`` parameter to be specified for time series problems  in ``AutoMLSearch`` :pr:`3041`
     * Fixes
+        * Added in error message when fit and predict/predict_proba data types are different :pr:`3036`
         * Fixed bug where ensembling components could not get converted to JSON format :pr:`3049`
         * Fixed bug where components with tuned integer hyperparameters could not get converted to JSON format :pr:`3049`
     * Changes

diff --git a/evalml/pipelines/component_graph.py b/evalml/pipelines/component_graph.py
@@ -15,7 +15,12 @@
 )
 from evalml.pipelines.components import ComponentBase, Estimator, Transformer
 from evalml.pipelines.components.utils import handle_component_class
-from evalml.utils import get_logger, import_or_raise, infer_feature_types
+from evalml.utils import (
+    _schema_is_equal,
+    get_logger,
+    import_or_raise,
+    infer_feature_types,
+)
 
 logger = get_logger(__file__)
 
@@ -88,6 +93,7 @@ def __init__(self, component_dict=None, random_seed=0):
         self._feature_provenance = {}
         self._i = 0
         self._compute_order = self.generate_order(self.component_dict)
+        self._input_types = {}
 
     def _validate_component_dict(self):
         for _, component_inputs in self.component_dict.items():
@@ -378,6 +384,14 @@ def _transform_features(
             dict: Outputs from each component.
         """
         X = infer_feature_types(X)
+        if not fit:
+            if not _schema_is_equal(X.ww.schema, self._input_types):
+                raise ValueError(
+                    "Input X data types are different from the input types the pipeline was fitted on."
+                )
+        else:
+            self._input_types = X.ww.schema
+
         if y is not None:
             y = infer_feature_types(y)
 

diff --git a/evalml/tests/pipeline_tests/test_component_graph.py b/evalml/tests/pipeline_tests/test_component_graph.py
@@ -35,6 +35,7 @@
     Transformer,
     Undersampler,
 )
+from evalml.problem_types import is_classification
 from evalml.utils import infer_feature_types
 
 
@@ -2418,3 +2419,59 @@ def test_training_only_component_in_component_graph_transform_all_but_final(
     component_graph.fit(X, y)
     transformed_X = component_graph.transform_all_but_final(X, y)
     assert len(transformed_X) == len(X)
+
+
+@pytest.mark.parametrize("problem_type", ["binary", "multiclass", "regression"])
+def test_fit_predict_different_types(
+    problem_type, X_y_binary, X_y_multi, X_y_regression
+):
+    if problem_type == "binary":
+        X, y = X_y_binary
+    elif problem_type == "multiclass":
+        X, y = X_y_multi
+    else:
+        X, y = X_y_regression
+
+    X = infer_feature_types(X)
+    X.ww.set_types({0: "Double"})
+    X2 = infer_feature_types(X.copy())
+    X2.ww.set_types({0: "Categorical"})
+    if is_classification(problem_type):
+        component_dict = {
+            "Imputer": ["Imputer", "X", "y"],
+            "RF": [
+                "Random Forest Classifier",
+                "Imputer.x",
+                "y",
+            ],
+        }
+    else:
+        component_dict = {
+            "Imputer": ["Imputer", "X", "y"],
+            "RF": [
+                "Random Forest Regressor",
+                "Imputer.x",
+                "y",
+            ],
+        }
+    component_graph = ComponentGraph(component_dict).instantiate({})
+    component_graph.fit(X, y)
+    with pytest.raises(
+        ValueError, match="Input X data types are different from the input types"
+    ):
+        component_graph.predict(X2)
+
+
+def test_fit_transform_different_types(X_y_binary):
+    X, y = X_y_binary
+    X = infer_feature_types(X)
+    X.ww.set_types({0: "Double"})
+    X2 = infer_feature_types(X.copy())
+    X2.ww.set_types({0: "Categorical"})
+    component_dict = {"Imputer": ["Imputer", "X", "y"]}
+    component_graph = ComponentGraph(component_dict).instantiate({})
+    component_graph.fit(X, y)
+    with pytest.raises(
+        ValueError, match="Input X data types are different from the input types"
+    ):
+        component_graph.transform(X2)
diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py
@@ -59,6 +59,7 @@
     is_multiclass,
     is_time_series,
 )
+from evalml.utils import infer_feature_types
 
 
 @pytest.mark.parametrize(
@@ -646,7 +647,8 @@ def test_score_nonlinear_regression(
 
 @patch("evalml.pipelines.BinaryClassificationPipeline.fit")
 @patch("evalml.pipelines.components.Estimator.predict")
-def test_score_binary_single(mock_predict, mock_fit, X_y_binary):
+@patch("evalml.pipelines.component_graph._schema_is_equal", return_value=True)
+def test_score_binary_single(mock_schema, mock_predict, mock_fit, X_y_binary):
     X, y = X_y_binary
     mock_predict.return_value = y
     clf = make_mock_binary_pipeline()
@@ -660,7 +662,8 @@ def test_score_binary_single(mock_predict, mock_fit, X_y_binary):
 
 @patch("evalml.pipelines.MulticlassClassificationPipeline.fit")
 @patch("evalml.pipelines.components.Estimator.predict")
-def test_score_multiclass_single(mock_predict, mock_fit, X_y_binary):
+@patch("evalml.pipelines.component_graph._schema_is_equal", return_value=True)
+def test_score_multiclass_single(mock_schema, mock_predict, mock_fit, X_y_binary):
     X, y = X_y_binary
     mock_predict.return_value = y
     clf = make_mock_multiclass_pipeline()
@@ -702,7 +705,8 @@ def test_score_regression_list(mock_predict, mock_fit, X_y_binary):
 
 @patch("evalml.pipelines.BinaryClassificationPipeline.fit")
 @patch("evalml.pipelines.components.Estimator.predict")
-def test_score_binary_list(mock_predict, mock_fit, X_y_binary):
+@patch("evalml.pipelines.component_graph._schema_is_equal", return_value=True)
+def test_score_binary_list(mock_schema, mock_predict, mock_fit, X_y_binary):
     X, y = X_y_binary
     mock_predict.return_value = y
     clf = make_mock_binary_pipeline()
@@ -717,7 +721,8 @@ def test_score_binary_list(mock_predict, mock_fit, X_y_binary):
 @patch("evalml.pipelines.MulticlassClassificationPipeline._encode_targets")
 @patch("evalml.pipelines.MulticlassClassificationPipeline.fit")
 @patch("evalml.pipelines.components.Estimator.predict")
-def test_score_multi_list(mock_predict, mock_fit, mock_encode, X_y_binary):
+@patch("evalml.pipelines.component_graph._schema_is_equal", return_value=True)
+def test_score_multi_list(mock_schema, mock_predict, mock_fit, mock_encode, X_y_binary):
     X, y = X_y_binary
     mock_predict.return_value = y
     mock_encode.return_value = y
@@ -756,8 +761,9 @@ def test_score_regression_objective_error(
 @patch("evalml.objectives.F1.score")
 @patch("evalml.pipelines.BinaryClassificationPipeline.fit")
 @patch("evalml.pipelines.components.Estimator.predict")
+@patch("evalml.pipelines.component_graph._schema_is_equal", return_value=True)
 def test_score_binary_objective_error(
-    mock_predict, mock_fit, mock_objective_score, mock_encode, X_y_binary
+    mock_schema, mock_predict, mock_fit, mock_objective_score, mock_encode, X_y_binary
 ):
     mock_objective_score.side_effect = Exception("finna kabooom 💣")
     X, y = X_y_binary
@@ -809,8 +815,9 @@ def test_score_nonlinear_binary_objective_error(
 @patch("evalml.objectives.F1Micro.score")
 @patch("evalml.pipelines.MulticlassClassificationPipeline.fit")
 @patch("evalml.pipelines.components.Estimator.predict")
+@patch("evalml.pipelines.component_graph._schema_is_equal", return_value=True)
 def test_score_multiclass_objective_error(
-    mock_predict, mock_fit, mock_objective_score, mock_encode, X_y_binary
+    mock_schema, mock_predict, mock_fit, mock_objective_score, mock_encode, X_y_binary
 ):
     mock_objective_score.side_effect = Exception("finna kabooom 💣")
     X, y = X_y_binary
@@ -2914,3 +2921,28 @@ def test_component_graph_pipeline_initialized():
         ]
         == "median"
     )
+
+
+@pytest.mark.parametrize("problem_type", ["binary", "multiclass"])
+def test_fit_predict_proba_types(problem_type, X_y_binary, X_y_multi):
+    component_graph = ["Imputer", "Random Forest Classifier"]
+    if problem_type == "binary":
+        pipeline = BinaryClassificationPipeline(component_graph)
+        X, y = X_y_binary
+    else:
+        pipeline = MulticlassClassificationPipeline(component_graph)
+        X, y = X_y_multi
+    X = infer_feature_types(X)
+    X.ww.set_types({0: "Double"})
+    X2 = infer_feature_types(X.copy())
+    X2.ww.set_types({0: "Categorical"})
+
+    pipeline.fit(X, y)
+    with pytest.raises(
+        ValueError, match="Input X data types are different from the input types"
+    ):
+        pipeline.predict(X2)
+    with pytest.raises(
+        ValueError, match="Input X data types are different from the input types"
+    ):
+        pipeline.predict_proba(X2)
diff --git a/evalml/tests/utils_tests/test_woodwork_utils.py b/evalml/tests/utils_tests/test_woodwork_utils.py
@@ -4,9 +4,13 @@
 import pandas as pd
 import pytest
 import woodwork as ww
-from woodwork.logical_types import Double, Unknown
+from woodwork.logical_types import URL, Categorical, Double, Integer, Unknown
 
-from evalml.utils import _convert_numeric_dataset_pandas, infer_feature_types
+from evalml.utils import (
+    _convert_numeric_dataset_pandas,
+    _schema_is_equal,
+    infer_feature_types,
+)
 
 
 def test_infer_feature_types_no_type_change():
@@ -252,3 +256,57 @@ def test_infer_feature_types_NA_to_nan(null_col, already_inited):
         assert all([isinstance(x, type(np.nan)) for x in inferred_df["unknown"]])
     else:
         assert all([isinstance(x, str) for x in df["unknown"]])
+
+
+@pytest.mark.parametrize(
+    "logical_types,l_equal",
+    [
+        ({"first": Categorical(), "second": Integer(), "third": Double()}, True),
+        ({"first": Categorical(), "second": Double(), "third": Double()}, True),
+        ({"first": Categorical(), "second": Double(), "third": Categorical()}, False),
+        ({"first": Categorical(), "second": Double(), "third": Unknown()}, False),
+        ({"first": URL(), "second": Double(), "third": Categorical()}, False),
+    ],
+)
+@pytest.mark.parametrize(
+    "semantic_tags,s_equal",
+    [
+        ({"first": [], "second": ["numeric"], "third": ["numeric"]}, True),
+        ({"first": [], "second": ["numeric"], "third": []}, False),
+        ({"first": [], "second": ["numeric"], "third": ["random tag here"]}, False),
+    ],
+)
+def test_schema_is_equal(semantic_tags, s_equal, logical_types, l_equal):
+    schema = ww.table_schema.TableSchema(
+        column_names=["first", "second", "third"],
+        logical_types={"first": Categorical(), "second": Integer(), "third": Double()},
+        semantic_tags={"first": [], "second": ["numeric"], "third": ["numeric"]},
+    )
+    schema_other = ww.table_schema.TableSchema(
+        column_names=["first", "second", "third"],
+        logical_types=logical_types,
+        semantic_tags=semantic_tags,
+    )
+    res = _schema_is_equal(schema, schema_other)
+    assert res == (l_equal and s_equal)
+
+
+def test_schema_is_equal_column_names():
+    schema = ww.table_schema.TableSchema(
+        column_names=["first", "second"],
+        logical_types={"first": Categorical(), "second": Integer()},
+        semantic_tags={"first": [], "second": ["numeric"]},
+    )
+    schema2 = ww.table_schema.TableSchema(
+        column_names=["second", "first"],
+        logical_types={"first": Categorical(), "second": Integer()},
+        semantic_tags={"first": [], "second": ["numeric"]},
+    )
+    assert not _schema_is_equal(schema, schema2)
+
+
+def test_schema_is_equal_fraud(fraud_100):
+    X, y = fraud_100
+    X2 = X.copy()
+    X2.ww.init()
+    assert _schema_is_equal(X.ww.schema, X2.ww.schema)
diff --git a/evalml/utils/__init__.py b/evalml/utils/__init__.py
@@ -29,4 +29,5 @@
 from .woodwork_utils import (
     infer_feature_types,
     _convert_numeric_dataset_pandas,
+    _schema_is_equal,
 )
diff --git a/evalml/utils/woodwork_utils.py b/evalml/utils/woodwork_utils.py
@@ -132,3 +132,26 @@ def _convert_numeric_dataset_pandas(X, y):
         )
     y_ww = infer_feature_types(y)
     return X_ww, y_ww
+
+
+def _schema_is_equal(first, other):
+    """Loosely check whether or not the Woodwork schemas are equivalent. This only checks that the string values for the schemas are equal and doesn't take the actual type objects into account.
+
+    Args:
+        first (ww.Schema): The schema of the first woodwork datatable
+        other (ww.Schema): The schema of the second woodwork datatable
+
+    Returns:
+        bool: Whether or not the two schemas are equal
+    """
+    if first.types.index.tolist() != other.types.index.tolist():
+        return False
+    logical = [
+        x if x != "Integer" else "Double"
+        for x in first.types["Logical Type"].astype(str).tolist()
+    ] == [
+        x if x != "Integer" else "Double"
+        for x in other.types["Logical Type"].astype(str).tolist()
+    ]
+    semantic = first.semantic_tags == other.semantic_tags
+    return logical and semantic