From 40f72358a446527ae1073ab6631a4c78349b9439 Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Thu, 7 Jan 2021 16:45:21 -0500 Subject: [PATCH 1/7] objectives handling lists --- docs/source/release_notes.rst | 1 + evalml/objectives/objective_base.py | 6 ++- evalml/pipelines/components/component_base.py | 4 +- .../classifiers/lightgbm_classifier.py | 3 +- .../components/transformers/transformer.py | 8 +++ evalml/tests/automl_tests/test_automl.py | 5 +- .../tests/component_tests/test_components.py | 54 +++++++++++++++++++ .../objective_tests/test_fraud_detection.py | 20 ++++++- evalml/tests/pipeline_tests/test_pipelines.py | 6 ++- evalml/utils/gen_utils.py | 2 +- 10 files changed, 101 insertions(+), 8 deletions(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 72942a742e..2bc5c994f8 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -9,6 +9,7 @@ Release Notes * Added multiclass check to ``InvalidTargetDataCheck`` for two examples per class :pr:`1596` * Support graphviz 0.16 :pr:`1657` * Enhanced time series pipelines to accept empty features :pr:`1651` + * Added support for list inputs for objectives :pr:`` * Fixes * Fixed thresholding for pipelines in ``AutoMLSearch`` to only threshold binary classification pipelines :pr:`1622` :pr:`1626` * Updated ``load_data`` to return Woodwork structures and update default parameter value for ``index`` to ``None`` :pr:`1610` diff --git a/evalml/objectives/objective_base.py b/evalml/objectives/objective_base.py index 1c30299d73..40d06c2e7e 100644 --- a/evalml/objectives/objective_base.py +++ b/evalml/objectives/objective_base.py @@ -76,11 +76,15 @@ def _standardize_input_type(input_data): """Standardize input to pandas for scoring. Arguments: - input_data (ww.DataTable, ww.DataColumn, pd.DataFrame, pd.Series, or np.ndarray): A matrix of predictions or predicted probabilities + input_data (list, ww.DataTable, ww.DataColumn, pd.DataFrame, pd.Series, or np.ndarray): A matrix of predictions or predicted probabilities Returns: pd.DataFrame or pd.Series: a pd.Series, or pd.DataFrame object if predicted probabilities were provided. """ + if isinstance(input_data, list): + if isinstance(input_data[0], list): + return pd.DataFrame(input_data) + return pd.Series(input_data) if isinstance(input_data, (pd.Series, pd.DataFrame)): return input_data if isinstance(input_data, ww.DataTable): diff --git a/evalml/pipelines/components/component_base.py b/evalml/pipelines/components/component_base.py index df9bd48c91..d81ba7c6c7 100644 --- a/evalml/pipelines/components/component_base.py +++ b/evalml/pipelines/components/component_base.py @@ -86,8 +86,8 @@ def fit(self, X, y=None): """Fits component to data Arguments: - X (ww.DataTable, pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features] - y (ww.DataColumn, pd.Series, np.ndarray, optional): The target training data of length [n_samples] + X (list, ww.DataTable, pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features] + y (list, ww.DataColumn, pd.Series, np.ndarray, optional): The target training data of length [n_samples] Returns: self diff --git a/evalml/pipelines/components/estimators/classifiers/lightgbm_classifier.py b/evalml/pipelines/components/estimators/classifiers/lightgbm_classifier.py index 3e62898ac1..9866da27d3 100644 --- a/evalml/pipelines/components/estimators/classifiers/lightgbm_classifier.py +++ b/evalml/pipelines/components/estimators/classifiers/lightgbm_classifier.py @@ -91,7 +91,8 @@ def _encode_categories(self, X, fit=False): return X_encoded def _encode_labels(self, y): - y_encoded = pd.Series(y) + y_encoded = _convert_to_woodwork_structure(y) + y_encoded = _convert_woodwork_types_wrapper(y_encoded.to_series()) # change only if dtype isn't int if not is_integer_dtype(y_encoded): self._label_encoder = LabelEncoder() diff --git a/evalml/pipelines/components/transformers/transformer.py b/evalml/pipelines/components/transformers/transformer.py index 51d2c7a4dd..688a85e450 100644 --- a/evalml/pipelines/components/transformers/transformer.py +++ b/evalml/pipelines/components/transformers/transformer.py @@ -3,6 +3,10 @@ from evalml.exceptions import MethodPropertyNotFoundError from evalml.model_family import ModelFamily from evalml.pipelines.components import ComponentBase +from evalml.utils.gen_utils import ( + _convert_to_woodwork_structure, + _convert_woodwork_types_wrapper +) class Transformer(ComponentBase): @@ -47,6 +51,10 @@ def fit_transform(self, X, y=None): pd.DataFrame: Transformed X """ try: + X = _convert_to_woodwork_structure(X) + y = _convert_to_woodwork_structure(y) + X = _convert_woodwork_types_wrapper(X.to_dataframe()) + y = _convert_woodwork_types_wrapper(y.to_series()) X_t = self._component_obj.fit_transform(X, y) except AttributeError: try: diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py index f401c1d16b..54b3294db4 100644 --- a/evalml/tests/automl_tests/test_automl.py +++ b/evalml/tests/automl_tests/test_automl.py @@ -1045,7 +1045,7 @@ def test_results_getter(mock_fit, mock_score, X_y_binary): assert automl.results['pipeline_results'][0]['score'] == 1.0 -@pytest.mark.parametrize("data_type", ['np', 'pd', 'ww']) +@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww']) @pytest.mark.parametrize("automl_type", [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]) @pytest.mark.parametrize("target_type", ['int16', 'int32', 'int64', 'float16', 'float32', 'float64', 'bool', 'category', 'object', 'Int64', 'boolean']) def test_targets_pandas_data_types_classification(data_type, automl_type, target_type): @@ -1071,6 +1071,9 @@ def test_targets_pandas_data_types_classification(data_type, automl_type, target y = y.map({unique_vals[i]: float(i) for i in range(len(unique_vals))}) y = y.astype(target_type) + if data_type == 'li': + X = X.to_numpy().tolist() + y = y.to_numpy().tolist() if data_type == 'np': X = X.to_numpy() diff --git a/evalml/tests/component_tests/test_components.py b/evalml/tests/component_tests/test_components.py index b87abfc27a..1ed3131866 100644 --- a/evalml/tests/component_tests/test_components.py +++ b/evalml/tests/component_tests/test_components.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd import pytest +import woodwork as ww from skopt.space import Categorical from evalml.exceptions import ( @@ -825,6 +826,59 @@ def test_all_estimators_check_fit(X_y_binary, test_estimator_needs_fitting_false component.feature_importance +@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww']) +def test_all_transformers_check_fit_input_type(data_type, X_y_binary): + X, y = X_y_binary + X = pd.DataFrame(X) + y = pd.Series(y) + + if data_type == "li": + X = X.to_numpy().tolist() + y = y.to_numpy().tolist() + + elif data_type == "pd": + X = X.to_numpy() + y = y.to_numpy() + + elif data_type == "ww": + X = ww.DataTable(X) + y = ww.DataColumn(y) + + for component_class in _all_transformers(): + if not component_class.needs_fitting: + continue + + component = component_class() + component.fit(X, y) + + +@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww']) +def test_all_estimators_check_fit_input_type(data_type, X_y_binary, test_estimator_needs_fitting_false, helper_functions): + X, y = X_y_binary + X = pd.DataFrame(X) + y = pd.Series(y) + + if data_type == "li": + X = X.to_numpy().tolist() + y = y.to_numpy().tolist() + + elif data_type == "np": + X = X.to_numpy() + y = y.to_numpy() + + elif data_type == "ww": + X = ww.DataTable(X) + y = ww.DataColumn(y) + + estimators_to_check = [estimator for estimator in _all_estimators() if estimator not in [StackedEnsembleClassifier, StackedEnsembleRegressor, TimeSeriesBaselineRegressor]] + [test_estimator_needs_fitting_false] + for component_class in estimators_to_check: + if not component_class.needs_fitting: + continue + + component = helper_functions.safe_init_component_with_njobs_1(component_class) + component.fit(X, y) + + def test_no_fitting_required_components(X_y_binary, test_estimator_needs_fitting_false, helper_functions): X, y = X_y_binary for component_class in all_components() + [test_estimator_needs_fitting_false]: diff --git a/evalml/tests/objective_tests/test_fraud_detection.py b/evalml/tests/objective_tests/test_fraud_detection.py index b7b8267e3b..f659c0d40a 100644 --- a/evalml/tests/objective_tests/test_fraud_detection.py +++ b/evalml/tests/objective_tests/test_fraud_detection.py @@ -33,10 +33,13 @@ def test_fraud_objective_function_amount_col(X_y_binary): fraud_payout_percentage=.75, amount_col="this column does not exist") y_predicted = pd.Series([.1, .5, .5]) - y_true = pd.Series([True, False, True]) + y_true = [True, False, True] with pytest.raises(ValueError, match="`this column does not exist` is not a valid column in X."): objective.objective_function(y_true, y_predicted, X) + with pytest.raises(ValueError, match="`this column does not exist` is not a valid column in X."): + objective.objective_function(y_true, y_predicted, X.tolist()) + def test_input_contains_nan(X_y_binary): fraud_cost = FraudCost(amount_col="value") @@ -139,3 +142,18 @@ def test_fraud_objective_score(X_y_binary): pd.testing.assert_series_equal(out, expected_y_pred, check_names=False) score = fraud_cost.score(y_true, out, extra_columns) assert (score == 0.255) + + +def test_fraud_objective_score_list(X_y_binary): + X, y = X_y_binary + fraud_cost = FraudCost(amount_col="value") + + y_predicted = [.1, .5, .5] + y_true = [True, False, True] + extra_columns = pd.DataFrame({"value": [100, 5, 250]}) + + out = fraud_cost.decision_function(y_predicted, 5, extra_columns) + assert isinstance(out, pd.Series) + pd.testing.assert_series_equal(out, pd.Series(y_true), check_names=False) + score = fraud_cost.score(y_true, out, extra_columns) + assert (score == 0.0) diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py index 0f72dbed93..a383768e9c 100644 --- a/evalml/tests/pipeline_tests/test_pipelines.py +++ b/evalml/tests/pipeline_tests/test_pipelines.py @@ -1597,7 +1597,7 @@ def test_get_default_parameters(logistic_regression_binary_pipeline_class): assert logistic_regression_binary_pipeline_class.default_parameters == expected_defaults -@pytest.mark.parametrize("data_type", ['np', 'pd', 'ww']) +@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww']) @pytest.mark.parametrize("problem_type", [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]) @pytest.mark.parametrize("target_type", ['int16', 'int32', 'int64', 'float16', 'float32', 'float64', 'bool', 'category', 'object', 'Int64', 'boolean']) def test_targets_data_types_classification_pipelines(data_type, problem_type, target_type, all_binary_pipeline_classes, @@ -1633,6 +1633,10 @@ def test_targets_data_types_classification_pipelines(data_type, problem_type, ta y = y.astype(target_type) unique_vals = y.unique() + if data_type == 'li': + X = X.to_numpy().tolist() + y = y.to_numpy().tolist() + if data_type == 'np': X = X.to_numpy() y = y.to_numpy() diff --git a/evalml/utils/gen_utils.py b/evalml/utils/gen_utils.py index d0f2b814be..e5e6de9a0a 100644 --- a/evalml/utils/gen_utils.py +++ b/evalml/utils/gen_utils.py @@ -219,7 +219,7 @@ def _rename_column_names_to_numeric(X): Transformed X where column names are renamed to numerical values """ X_t = X - if isinstance(X, np.ndarray): + if isinstance(X, (np.ndarray, list)): return pd.DataFrame(X) if isinstance(X, ww.DataTable): X_t = X.to_dataframe() From 2ee661a9e783b2dbdbd1fb14f50cf83e11864291 Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Thu, 7 Jan 2021 16:46:55 -0500 Subject: [PATCH 2/7] release notes --- docs/source/release_notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 2bc5c994f8..2240db0555 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -9,7 +9,7 @@ Release Notes * Added multiclass check to ``InvalidTargetDataCheck`` for two examples per class :pr:`1596` * Support graphviz 0.16 :pr:`1657` * Enhanced time series pipelines to accept empty features :pr:`1651` - * Added support for list inputs for objectives :pr:`` + * Added support for list inputs for objectives :pr:`1663` * Fixes * Fixed thresholding for pipelines in ``AutoMLSearch`` to only threshold binary classification pipelines :pr:`1622` :pr:`1626` * Updated ``load_data`` to return Woodwork structures and update default parameter value for ``index`` to ``None`` :pr:`1610` From d88eb59ccf6eb4c0024d4e14f70e63b0057166ea Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Fri, 8 Jan 2021 12:11:39 -0500 Subject: [PATCH 3/7] fix transforming --- .../pipelines/components/transformers/transformer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/evalml/pipelines/components/transformers/transformer.py b/evalml/pipelines/components/transformers/transformer.py index 688a85e450..cbef7ede28 100644 --- a/evalml/pipelines/components/transformers/transformer.py +++ b/evalml/pipelines/components/transformers/transformer.py @@ -51,11 +51,11 @@ def fit_transform(self, X, y=None): pd.DataFrame: Transformed X """ try: - X = _convert_to_woodwork_structure(X) - y = _convert_to_woodwork_structure(y) - X = _convert_woodwork_types_wrapper(X.to_dataframe()) - y = _convert_woodwork_types_wrapper(y.to_series()) - X_t = self._component_obj.fit_transform(X, y) + X2 = _convert_to_woodwork_structure(X) + y2 = _convert_to_woodwork_structure(y) + X2 = _convert_woodwork_types_wrapper(X2.to_dataframe()) + y2 = _convert_woodwork_types_wrapper(y2.to_series()) + X_t = self._component_obj.fit_transform(X2, y2) except AttributeError: try: self.fit(X, y) From 2e0bfe13259cf0fefc1e3b4f32412d143d412e6a Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Fri, 8 Jan 2021 14:48:05 -0500 Subject: [PATCH 4/7] clean up tests --- evalml/tests/automl_tests/test_automl.py | 16 ++------ .../tests/component_tests/test_components.py | 39 +++---------------- evalml/tests/conftest.py | 5 +++ evalml/tests/pipeline_tests/test_pipelines.py | 15 ++----- 4 files changed, 18 insertions(+), 57 deletions(-) diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py index 4f4ac82f0d..d4ccf5e6c3 100644 --- a/evalml/tests/automl_tests/test_automl.py +++ b/evalml/tests/automl_tests/test_automl.py @@ -1053,7 +1053,7 @@ def test_results_getter(mock_fit, mock_score, X_y_binary): @pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww']) @pytest.mark.parametrize("automl_type", [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]) @pytest.mark.parametrize("target_type", ['int16', 'int32', 'int64', 'float16', 'float32', 'float64', 'bool', 'category', 'object', 'Int64', 'boolean']) -def test_targets_pandas_data_types_classification(data_type, automl_type, target_type): +def test_targets_pandas_data_types_classification(data_type, automl_type, target_type, make_data_type): if data_type == 'np' and target_type in ['Int64', 'boolean']: pytest.skip("Skipping test where data type is numpy and target type is nullable dtype") @@ -1076,17 +1076,9 @@ def test_targets_pandas_data_types_classification(data_type, automl_type, target y = y.map({unique_vals[i]: float(i) for i in range(len(unique_vals))}) y = y.astype(target_type) - if data_type == 'li': - X = X.to_numpy().tolist() - y = y.to_numpy().tolist() - - if data_type == 'np': - X = X.to_numpy() - y = y.to_numpy() - - elif data_type == 'ww': - X = ww.DataTable(X) - y = ww.DataColumn(y) + if data_type != 'pd': + X = make_data_type(data_type, X) + y = make_data_type(data_type, y) automl = AutoMLSearch(X_train=X, y_train=y, problem_type=automl_type, max_iterations=3, n_jobs=1) automl.search() diff --git a/evalml/tests/component_tests/test_components.py b/evalml/tests/component_tests/test_components.py index 1ed3131866..538d78dfcb 100644 --- a/evalml/tests/component_tests/test_components.py +++ b/evalml/tests/component_tests/test_components.py @@ -8,7 +8,6 @@ import numpy as np import pandas as pd import pytest -import woodwork as ww from skopt.space import Categorical from evalml.exceptions import ( @@ -827,23 +826,10 @@ def test_all_estimators_check_fit(X_y_binary, test_estimator_needs_fitting_false @pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww']) -def test_all_transformers_check_fit_input_type(data_type, X_y_binary): +def test_all_transformers_check_fit_input_type(data_type, X_y_binary, make_data_type): X, y = X_y_binary - X = pd.DataFrame(X) - y = pd.Series(y) - - if data_type == "li": - X = X.to_numpy().tolist() - y = y.to_numpy().tolist() - - elif data_type == "pd": - X = X.to_numpy() - y = y.to_numpy() - - elif data_type == "ww": - X = ww.DataTable(X) - y = ww.DataColumn(y) - + X = make_data_type(data_type, X) + y = make_data_type(data_type, y) for component_class in _all_transformers(): if not component_class.needs_fitting: continue @@ -853,23 +839,10 @@ def test_all_transformers_check_fit_input_type(data_type, X_y_binary): @pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww']) -def test_all_estimators_check_fit_input_type(data_type, X_y_binary, test_estimator_needs_fitting_false, helper_functions): +def test_all_estimators_check_fit_input_type(data_type, X_y_binary, make_data_type, test_estimator_needs_fitting_false, helper_functions): X, y = X_y_binary - X = pd.DataFrame(X) - y = pd.Series(y) - - if data_type == "li": - X = X.to_numpy().tolist() - y = y.to_numpy().tolist() - - elif data_type == "np": - X = X.to_numpy() - y = y.to_numpy() - - elif data_type == "ww": - X = ww.DataTable(X) - y = ww.DataColumn(y) - + X = make_data_type(data_type, X) + y = make_data_type(data_type, y) estimators_to_check = [estimator for estimator in _all_estimators() if estimator not in [StackedEnsembleClassifier, StackedEnsembleRegressor, TimeSeriesBaselineRegressor]] + [test_estimator_needs_fitting_false] for component_class in estimators_to_check: if not component_class.needs_fitting: diff --git a/evalml/tests/conftest.py b/evalml/tests/conftest.py index 680803f231..424b2723a8 100644 --- a/evalml/tests/conftest.py +++ b/evalml/tests/conftest.py @@ -516,6 +516,11 @@ def safe_init_pipeline_with_njobs_1(pipeline_class): def make_data_type(): """Helper function to convert numpy or pandas input to the appropriate type for tests.""" def _make_data_type(data_type, data): + if data_type == "li": + if isinstance(data, pd.DataFrame): + data = data.to_numpy() + data = data.tolist() + return data if data_type != "np": if len(data.shape) == 1: data = pd.Series(data) diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py index a383768e9c..795a3a9694 100644 --- a/evalml/tests/pipeline_tests/test_pipelines.py +++ b/evalml/tests/pipeline_tests/test_pipelines.py @@ -1601,7 +1601,7 @@ def test_get_default_parameters(logistic_regression_binary_pipeline_class): @pytest.mark.parametrize("problem_type", [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]) @pytest.mark.parametrize("target_type", ['int16', 'int32', 'int64', 'float16', 'float32', 'float64', 'bool', 'category', 'object', 'Int64', 'boolean']) def test_targets_data_types_classification_pipelines(data_type, problem_type, target_type, all_binary_pipeline_classes, - all_multiclass_pipeline_classes, helper_functions): + make_data_type, all_multiclass_pipeline_classes, helper_functions): if data_type == 'np' and target_type in ['Int64', 'boolean']: pytest.skip("Skipping test where data type is numpy and target type is nullable dtype") @@ -1633,17 +1633,8 @@ def test_targets_data_types_classification_pipelines(data_type, problem_type, ta y = y.astype(target_type) unique_vals = y.unique() - if data_type == 'li': - X = X.to_numpy().tolist() - y = y.to_numpy().tolist() - - if data_type == 'np': - X = X.to_numpy() - y = y.to_numpy() - - elif data_type == 'ww': - X = ww.DataTable(X) - y = ww.DataColumn(y) + X = make_data_type(data_type, X) + y = make_data_type(data_type, y) for pipeline_class in pipeline_classes: pipeline = helper_functions.safe_init_pipeline_with_njobs_1(pipeline_class) From 0033795dc7f39eece56535d86323d48741ab697e Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Fri, 8 Jan 2021 15:37:34 -0500 Subject: [PATCH 5/7] clean up tests: --- evalml/objectives/objective_base.py | 13 +++---- .../tests/component_tests/test_components.py | 14 -------- .../tests/component_tests/test_estimators.py | 34 ++++++++++++++++++- 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/evalml/objectives/objective_base.py b/evalml/objectives/objective_base.py index 40d06c2e7e..4d004676b7 100644 --- a/evalml/objectives/objective_base.py +++ b/evalml/objectives/objective_base.py @@ -81,19 +81,20 @@ def _standardize_input_type(input_data): Returns: pd.DataFrame or pd.Series: a pd.Series, or pd.DataFrame object if predicted probabilities were provided. """ - if isinstance(input_data, list): - if isinstance(input_data[0], list): - return pd.DataFrame(input_data) - return pd.Series(input_data) if isinstance(input_data, (pd.Series, pd.DataFrame)): return input_data if isinstance(input_data, ww.DataTable): return _convert_woodwork_types_wrapper(input_data.to_dataframe()) if isinstance(input_data, ww.DataColumn): return _convert_woodwork_types_wrapper(input_data.to_series()) - if len(input_data.shape) == 1: + if isinstance(input_data, list): + if isinstance(input_data[0], list): + return pd.DataFrame(input_data) return pd.Series(input_data) - return pd.DataFrame(input_data) + if isinstance(input_data, np.ndarray): + if len(input_data.shape) == 1: + return pd.Series(input_data) + return pd.DataFrame(input_data) def validate_inputs(self, y_true, y_predicted): """Validates the input based on a few simple checks. diff --git a/evalml/tests/component_tests/test_components.py b/evalml/tests/component_tests/test_components.py index 538d78dfcb..fe2bffc0c1 100644 --- a/evalml/tests/component_tests/test_components.py +++ b/evalml/tests/component_tests/test_components.py @@ -838,20 +838,6 @@ def test_all_transformers_check_fit_input_type(data_type, X_y_binary, make_data_ component.fit(X, y) -@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww']) -def test_all_estimators_check_fit_input_type(data_type, X_y_binary, make_data_type, test_estimator_needs_fitting_false, helper_functions): - X, y = X_y_binary - X = make_data_type(data_type, X) - y = make_data_type(data_type, y) - estimators_to_check = [estimator for estimator in _all_estimators() if estimator not in [StackedEnsembleClassifier, StackedEnsembleRegressor, TimeSeriesBaselineRegressor]] + [test_estimator_needs_fitting_false] - for component_class in estimators_to_check: - if not component_class.needs_fitting: - continue - - component = helper_functions.safe_init_component_with_njobs_1(component_class) - component.fit(X, y) - - def test_no_fitting_required_components(X_y_binary, test_estimator_needs_fitting_false, helper_functions): X, y = X_y_binary for component_class in all_components() + [test_estimator_needs_fitting_false]: diff --git a/evalml/tests/component_tests/test_estimators.py b/evalml/tests/component_tests/test_estimators.py index 2f26be8e5f..c73af0f9f2 100644 --- a/evalml/tests/component_tests/test_estimators.py +++ b/evalml/tests/component_tests/test_estimators.py @@ -2,10 +2,14 @@ import numpy as np import pandas as pd +import pytest from evalml.model_family import ModelFamily from evalml.pipelines.components import Estimator -from evalml.pipelines.components.utils import _all_estimators_used_in_search +from evalml.pipelines.components.utils import ( + _all_estimators_used_in_search, + get_estimators +) from evalml.problem_types import ProblemTypes, handle_problem_types @@ -56,3 +60,31 @@ class MockEstimator(Estimator): mock_estimator.supported_problem_types = ['binary', 'multiclass'] assert mock_estimator != MockEstimator() assert 'Mock Estimator' != mock_estimator + + +@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww']) +def test_all_estimators_check_fit_input_type(data_type, X_y_binary, make_data_type, helper_functions): + X, y = X_y_binary + X = make_data_type(data_type, X) + y = make_data_type(data_type, y) + estimators_to_check = [estimator for estimator in get_estimators('binary')] + for component_class in estimators_to_check: + if not component_class.needs_fitting: + continue + + component = helper_functions.safe_init_component_with_njobs_1(component_class) + component.fit(X, y) + + +@pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww']) +def test_all_estimators_check_fit_input_type_regression(data_type, X_y_regression, make_data_type, helper_functions): + X, y = X_y_regression + X = make_data_type(data_type, X) + y = make_data_type(data_type, y) + estimators_to_check = [estimator for estimator in get_estimators('regression')] + for component_class in estimators_to_check: + if not component_class.needs_fitting: + continue + + component = helper_functions.safe_init_component_with_njobs_1(component_class) + component.fit(X, y) From 557ec549eb1524a3098079a65ff58e43ce96d0b8 Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Fri, 8 Jan 2021 16:08:22 -0500 Subject: [PATCH 6/7] fix test --- evalml/tests/component_tests/test_estimators.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/evalml/tests/component_tests/test_estimators.py b/evalml/tests/component_tests/test_estimators.py index c73af0f9f2..64aa59320c 100644 --- a/evalml/tests/component_tests/test_estimators.py +++ b/evalml/tests/component_tests/test_estimators.py @@ -69,9 +69,6 @@ def test_all_estimators_check_fit_input_type(data_type, X_y_binary, make_data_ty y = make_data_type(data_type, y) estimators_to_check = [estimator for estimator in get_estimators('binary')] for component_class in estimators_to_check: - if not component_class.needs_fitting: - continue - component = helper_functions.safe_init_component_with_njobs_1(component_class) component.fit(X, y) @@ -83,8 +80,5 @@ def test_all_estimators_check_fit_input_type_regression(data_type, X_y_regressio y = make_data_type(data_type, y) estimators_to_check = [estimator for estimator in get_estimators('regression')] for component_class in estimators_to_check: - if not component_class.needs_fitting: - continue - component = helper_functions.safe_init_component_with_njobs_1(component_class) component.fit(X, y) From f4907d38f6bb3b11d53a598b2d78b9a2d61b9f13 Mon Sep 17 00:00:00 2001 From: bchen1116 Date: Mon, 11 Jan 2021 10:32:45 -0500 Subject: [PATCH 7/7] predict and predict_proba cov --- evalml/tests/component_tests/test_estimators.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/evalml/tests/component_tests/test_estimators.py b/evalml/tests/component_tests/test_estimators.py index 64aa59320c..9af9ea2ca9 100644 --- a/evalml/tests/component_tests/test_estimators.py +++ b/evalml/tests/component_tests/test_estimators.py @@ -71,6 +71,8 @@ def test_all_estimators_check_fit_input_type(data_type, X_y_binary, make_data_ty for component_class in estimators_to_check: component = helper_functions.safe_init_component_with_njobs_1(component_class) component.fit(X, y) + component.predict(X) + component.predict_proba(X) @pytest.mark.parametrize("data_type", ['li', 'np', 'pd', 'ww']) @@ -82,3 +84,4 @@ def test_all_estimators_check_fit_input_type_regression(data_type, X_y_regressio for component_class in estimators_to_check: component = helper_functions.safe_init_component_with_njobs_1(component_class) component.fit(X, y) + component.predict(X)