diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 14d30a0f37..45569b51df 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -11,6 +11,7 @@ Release Notes * Fixes * Updated TextFeaturizer component to no longer require an internet connection to run :pr:`1022` * Fixed non-deterministic element of TextFeaturizer transformations :pr:`1022` + * Added a StandardScaler to all ElasticNet pipelines :pr:`1065` * Changes * Added `needs_fitting` property to ComponentBase :pr:`1044` * Updated references to data types to use datatype lists defined in `evalml.utils.gen_utils` :pr:`1039` diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py index d3522dc104..f73ada3fca 100644 --- a/evalml/pipelines/utils.py +++ b/evalml/pipelines/utils.py @@ -6,14 +6,13 @@ ) from .regression_pipeline import RegressionPipeline +from evalml.model_family import ModelFamily from evalml.pipelines.components import ( CatBoostClassifier, CatBoostRegressor, DateTimeFeaturizer, DropNullColumns, Imputer, - LinearRegressor, - LogisticRegressionClassifier, OneHotEncoder, StandardScaler ) @@ -56,7 +55,7 @@ def _get_preprocessing_components(X, y, problem_type, estimator_class): if (add_datetime_featurizer or len(categorical_cols.columns) > 0) and estimator_class not in {CatBoostClassifier, CatBoostRegressor}: pp_components.append(OneHotEncoder) - if estimator_class in {LinearRegressor, LogisticRegressionClassifier}: + if estimator_class.model_family == ModelFamily.LINEAR_MODEL: pp_components.append(StandardScaler) return pp_components diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py index 316c42a4f0..c86436ba8d 100644 --- a/evalml/tests/pipeline_tests/test_pipelines.py +++ b/evalml/tests/pipeline_tests/test_pipelines.py @@ -25,6 +25,8 @@ from evalml.pipelines.components import ( DateTimeFeaturizer, DropNullColumns, + ElasticNetClassifier, + ElasticNetRegressor, Imputer, LinearRegressor, LogisticRegressionClassifier, @@ -114,6 +116,11 @@ def test_make_pipeline(): assert isinstance(binary_pipeline, type(BinaryClassificationPipeline)) assert binary_pipeline.component_graph == [DropNullColumns, Imputer, DateTimeFeaturizer, OneHotEncoder, StandardScaler, LogisticRegressionClassifier] + en_binary_pipeline = make_pipeline(X, y, ElasticNetClassifier, ProblemTypes.BINARY) + assert isinstance(en_binary_pipeline, type(BinaryClassificationPipeline)) + assert en_binary_pipeline.component_graph == [DropNullColumns, Imputer, DateTimeFeaturizer, OneHotEncoder, + StandardScaler, ElasticNetClassifier] + binary_pipeline = make_pipeline(X, y, RandomForestClassifier, ProblemTypes.BINARY) assert isinstance(binary_pipeline, type(BinaryClassificationPipeline)) assert binary_pipeline.component_graph == [DropNullColumns, Imputer, DateTimeFeaturizer, OneHotEncoder, RandomForestClassifier] @@ -123,6 +130,11 @@ def test_make_pipeline(): assert isinstance(multiclass_pipeline, type(MulticlassClassificationPipeline)) assert multiclass_pipeline.component_graph == [DropNullColumns, Imputer, DateTimeFeaturizer, OneHotEncoder, StandardScaler, LogisticRegressionClassifier] + en_multiclass_pipeline = make_pipeline(X, y, ElasticNetClassifier, ProblemTypes.MULTICLASS) + assert isinstance(en_multiclass_pipeline, type(MulticlassClassificationPipeline)) + assert en_multiclass_pipeline.component_graph == [DropNullColumns, Imputer, DateTimeFeaturizer, OneHotEncoder, + StandardScaler, ElasticNetClassifier] + regression_pipeline = make_pipeline(X, y, RandomForestRegressor, ProblemTypes.REGRESSION) assert isinstance(regression_pipeline, type(RegressionPipeline)) assert regression_pipeline.component_graph == [DropNullColumns, Imputer, DateTimeFeaturizer, OneHotEncoder, RandomForestRegressor] @@ -131,6 +143,11 @@ def test_make_pipeline(): assert isinstance(regression_pipeline, type(RegressionPipeline)) assert regression_pipeline.component_graph == [DropNullColumns, Imputer, DateTimeFeaturizer, OneHotEncoder, StandardScaler, LinearRegressor] + en_multiclass_pipeline = make_pipeline(X, y, ElasticNetRegressor, ProblemTypes.REGRESSION) + assert isinstance(en_multiclass_pipeline, type(RegressionPipeline)) + assert en_multiclass_pipeline.component_graph == [DropNullColumns, Imputer, DateTimeFeaturizer, OneHotEncoder, + StandardScaler, ElasticNetRegressor] + def test_make_pipeline_no_nulls(): X = pd.DataFrame({"numerical": [1, 2, 3, 1, 2],