Skip to content

Commit

Permalink
Delete baseline pipeline classes (#2202)
Browse files Browse the repository at this point in the history
* init

* fixed baseline for binary

* convert over multiclass

* fix tests

* update regression pipelines

* comment out time series

* oops fix import

* fix more imports

* delete and cleanup

* add test and fix input feature names
  • Loading branch information
angela97lin committed May 2, 2021
1 parent 9d108d7 commit 2f7f653
Show file tree
Hide file tree
Showing 19 changed files with 148 additions and 636 deletions.
24 changes: 0 additions & 24 deletions docs/source/api_reference.rst
Expand Up @@ -151,30 +151,6 @@ Pipeline Base Classes
TimeSeriesMulticlassClassificationPipeline
TimeSeriesRegressionPipeline

Classification Pipelines
~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: generated
:template: pipeline_class.rst
:nosignatures:

BaselineBinaryPipeline
BaselineMulticlassPipeline
ModeBaselineBinaryPipeline
ModeBaselineMulticlassPipeline

Regression Pipelines
~~~~~~~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated
:template: pipeline_class.rst
:nosignatures:

BaselineRegressionPipeline
MeanBaselineRegressionPipeline
TimeSeriesBaselineRegressionPipeline


.. currentmodule:: evalml.pipelines.utils

Expand Down
2 changes: 2 additions & 0 deletions docs/source/release_notes.rst
Expand Up @@ -10,6 +10,7 @@ Release Notes
* Fixes
* Fixed partial dependence not respecting grid resolution parameter for numerical features :pr:`2180`
* Changes
* Deleted baseline pipeline classes :pr:`2202`
* Reverting user specified date feature PR :pr:`2155` until `pmdarima` installation fix is found :pr:`2214`
* Updated pipeline API to accept component graph and other class attributes as instance parameters. Old pipeline API still works but will not be supported long-term. :pr:`2091`
* Documentation Changes
Expand All @@ -24,6 +25,7 @@ Release Notes
.. warning::

**Breaking Changes**
* All baseline pipeline classes (``BaselineBinaryPipeline``, ``BaselineMulticlassPipeline``, ``BaselineRegressionPipeline``, etc.) have been deleted :pr:`2202`
* Updated pipeline API to accept component graph and other class attributes as instance parameters. Old pipeline API still works but will not be supported long-term. Pipelines can now be initialized by specifying the component graph as the first parameter, and then passing in optional arguments such as ``custom_name``, ``parameters``, etc. For example, ``BinaryClassificationPipeline(["Random Forest Classifier"], parameters={})``. :pr:`2091`

**v0.23.0 Apr. 20, 2021**
Expand Down
49 changes: 31 additions & 18 deletions evalml/automl/automl_search.py
Expand Up @@ -34,13 +34,13 @@
get_objective
)
from evalml.pipelines import (
MeanBaselineRegressionPipeline,
ModeBaselineBinaryPipeline,
ModeBaselineMulticlassPipeline,
BinaryClassificationPipeline,
MulticlassClassificationPipeline,
PipelineBase,
TimeSeriesBaselineBinaryPipeline,
TimeSeriesBaselineMulticlassPipeline,
TimeSeriesBaselineRegressionPipeline
RegressionPipeline,
TimeSeriesBinaryClassificationPipeline,
TimeSeriesMulticlassClassificationPipeline,
TimeSeriesRegressionPipeline
)
from evalml.pipelines.components.utils import get_estimators
from evalml.pipelines.utils import make_pipeline
Expand Down Expand Up @@ -648,25 +648,38 @@ def _validate_problem_type(self):
if pipeline.problem_type != self.problem_type:
raise ValueError("Given pipeline {} is not compatible with problem_type {}.".format(pipeline.name, self.problem_type.value))

def _add_baseline_pipelines(self):
"""Fits a baseline pipeline to the data.
This is the first pipeline fit during search.
"""
def _get_baseline_pipeline(self):
"""Creates a baseline pipeline instance."""
if self.problem_type == ProblemTypes.BINARY:
baseline = ModeBaselineBinaryPipeline(parameters={})
baseline = BinaryClassificationPipeline(component_graph=["Baseline Classifier"],
custom_name="Mode Baseline Binary Classification Pipeline",
custom_hyperparameters={"strategy": ["mode"]})
elif self.problem_type == ProblemTypes.MULTICLASS:
baseline = ModeBaselineMulticlassPipeline(parameters={})
baseline = MulticlassClassificationPipeline(component_graph=["Baseline Classifier"],
custom_name="Mode Baseline Multiclass Classification Pipeline",
custom_hyperparameters={"strategy": ["mode"]})
elif self.problem_type == ProblemTypes.REGRESSION:
baseline = MeanBaselineRegressionPipeline(parameters={})
baseline = RegressionPipeline(component_graph=["Baseline Regressor"],
custom_name="Mean Baseline Regression Pipeline",
custom_hyperparameters={"strategy": ["mean"]})
else:
pipeline_class = {ProblemTypes.TIME_SERIES_REGRESSION: TimeSeriesBaselineRegressionPipeline,
ProblemTypes.TIME_SERIES_MULTICLASS: TimeSeriesBaselineMulticlassPipeline,
ProblemTypes.TIME_SERIES_BINARY: TimeSeriesBaselineBinaryPipeline}[self.problem_type]
pipeline_class, pipeline_name = {ProblemTypes.TIME_SERIES_REGRESSION: (TimeSeriesRegressionPipeline, "Time Series Baseline Regression Pipeline"),
ProblemTypes.TIME_SERIES_MULTICLASS: (TimeSeriesMulticlassClassificationPipeline, "Time Series Baseline Multiclass Pipeline"),
ProblemTypes.TIME_SERIES_BINARY: (TimeSeriesBinaryClassificationPipeline, "Time Series Baseline Binary Pipeline")}[self.problem_type]
gap = self.problem_configuration['gap']
max_delay = self.problem_configuration['max_delay']
baseline = pipeline_class(parameters={"pipeline": {"gap": gap, "max_delay": max_delay},
baseline = pipeline_class(component_graph=["Time Series Baseline Estimator"],
custom_name=pipeline_name,
parameters={"pipeline": {"gap": gap, "max_delay": max_delay},
"Time Series Baseline Estimator": {"gap": gap, "max_delay": max_delay}})
return baseline

def _add_baseline_pipelines(self):
"""Fits a baseline pipeline to the data.
This is the first pipeline fit during search.
"""
baseline = self._get_baseline_pipeline()
self._pre_evaluation_callback(baseline)
logger.info(f"Evaluating Baseline Pipeline: {baseline.name}")
computation = self._engine.submit_evaluation_job(self.automl_config, baseline, self.X_train, self.y_train)
Expand Down
11 changes: 0 additions & 11 deletions evalml/pipelines/__init__.py
Expand Up @@ -46,15 +46,4 @@
TimeSeriesBinaryClassificationPipeline,
TimeSeriesMulticlassClassificationPipeline
)
from .classification import (
BaselineBinaryPipeline,
BaselineMulticlassPipeline,
ModeBaselineBinaryPipeline,
ModeBaselineMulticlassPipeline
)
from .time_series_regression_pipeline import TimeSeriesRegressionPipeline
from .regression import (
BaselineRegressionPipeline,
MeanBaselineRegressionPipeline,
)
from .time_series_baselines import TimeSeriesBaselineRegressionPipeline, TimeSeriesBaselineBinaryPipeline, TimeSeriesBaselineMulticlassPipeline
2 changes: 0 additions & 2 deletions evalml/pipelines/classification/__init__.py

This file was deleted.

27 changes: 0 additions & 27 deletions evalml/pipelines/classification/baseline_binary.py

This file was deleted.

27 changes: 0 additions & 27 deletions evalml/pipelines/classification/baseline_multiclass.py

This file was deleted.

4 changes: 3 additions & 1 deletion evalml/pipelines/component_graph.py
Expand Up @@ -162,7 +162,9 @@ def _fit_transform_features_helper(self, needs_fitting, X, y=None):
ww.DataTable: Transformed values.
"""
if len(self.compute_order) <= 1:
return infer_feature_types(X)
X = infer_feature_types(X)
self.input_feature_names.update({self.compute_order[0]: list(X.columns)})
return X
component_outputs = self._compute_features(self.compute_order[:-1], X, y=y, fit=needs_fitting)
final_component_inputs = []
for parent in self.get_parents(self.compute_order[-1]):
Expand Down
1 change: 0 additions & 1 deletion evalml/pipelines/regression/__init__.py

This file was deleted.

27 changes: 0 additions & 27 deletions evalml/pipelines/regression/baseline_regression.py

This file was deleted.

62 changes: 0 additions & 62 deletions evalml/pipelines/time_series_baselines.py

This file was deleted.

0 comments on commit 2f7f653

Please sign in to comment.