Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace pipelines' supported_problem_types' with 'problem_type' in base classes #678

Merged
merged 10 commits into from
Apr 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/_templates/pipeline_class.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
.. currentmodule:: {{ module }}

.. autoclass:: {{ objname }}
{% set class_attributes = ['name', 'summary', 'component_graph', 'supported_problem_types', 'model_family', 'hyperparameters', 'custom_hyperparameters'] %}
{% set class_attributes = ['name', 'summary', 'component_graph', 'problem_type', 'model_family', 'hyperparameters', 'custom_hyperparameters'] %}

{% block attributes %}
.. Class attributes:
.. autoattribute:: name
.. autoattribute:: summary
.. autoattribute:: component_graph
.. autoattribute:: supported_problem_types
.. autoattribute:: problem_type
.. autoattribute:: model_family
.. autoattribute:: hyperparameters
.. autoattribute:: custom_hyperparameters
Expand Down
2 changes: 2 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Changelog
* Removed `graphs.py` and moved methods into `PipelineBase` :pr:`657`, :pr:`665`
* Remove s3fs as a dev dependency :pr:`664`
* Changed requirements-parser to be a core dependency :pr:`673`
* Replace `supported_problem_types` field on pipelines with `problem_type` attribute on base classes :pr:`678`
* Documentation Changes
* Fixed some sphinx warnings :pr:`593`
* Fixed docstring for AutoClassificationSearch with correct command :pr:`599`
Expand Down Expand Up @@ -62,6 +63,7 @@ Changelog
* ``score()`` will now return one dictionary of all objective scores.
* ``ROC`` and ``ConfusionMatrix`` plot methods via ``Auto(*).plot`` will currently fail due to :pr:`615`
* Pipelines ``_name`` field changed to ``custom_name``
* Pipelines ``supported_problem_types`` field is removed because it is no longer necessary :pr:`678`


**v0.8.0 Apr. 1, 2020**
Expand Down
7 changes: 3 additions & 4 deletions docs/source/pipelines/custom_pipelines.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"metadata": {},
"outputs": [],
"source": [
"from evalml.pipelines import PipelineBase\n",
"from evalml.pipelines import MulticlassClassificationPipeline\n",
"from evalml.pipelines.components import StandardScaler, SimpleImputer\n",
"from evalml.pipelines.components.estimators import LogisticRegressionClassifier\n",
"\n",
Expand All @@ -24,13 +24,12 @@
"objective = 'Precision_Macro'\n",
"\n",
"\n",
"# pipeline needs to be a subclass of `PipelineBase`\n",
"class CustomPipeline(PipelineBase):\n",
"# the pipeline needs to be a subclass of one of our base pipelines, in this case `MulticlassClassificationPipeline`\n",
"class CustomPipeline(MulticlassClassificationPipeline):\n",
" # component_graph and problem_types are required class variables\n",
" \n",
" # components can be passed in as objects or as component name strings\n",
" component_graph = ['Simple Imputer', StandardScaler(), 'Logistic Regression Classifier']\n",
" supported_problem_types = ['binary', 'multiclass']\n",
"\n",
" # you can override component hyperparameter_ranges like so\n",
" # ranges must adhere to skopt tuner\n",
Expand Down
4 changes: 2 additions & 2 deletions evalml/pipelines/binary_classification_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
class BinaryClassificationPipeline(ClassificationPipeline):

threshold = None
supported_problem_types = ['binary']
problem_type = ProblemTypes.BINARY

def predict(self, X, objective=None):
"""Make predictions using selected features.
Expand All @@ -27,7 +27,7 @@ def predict(self, X, objective=None):

if objective is not None:
objective = get_objective(objective)
if objective.problem_type != ProblemTypes.BINARY:
if objective.problem_type != self.problem_type:
raise ValueError("You can only use a binary classification objective to make predictions for a binary classification pipeline.")

if self.threshold is None:
Expand Down
3 changes: 2 additions & 1 deletion evalml/pipelines/multiclass_classification_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@

from evalml.objectives import get_objective
from evalml.pipelines.classification_pipeline import ClassificationPipeline
from evalml.problem_types import ProblemTypes


class MulticlassClassificationPipeline(ClassificationPipeline):
supported_problem_types = ['multiclass']
problem_type = ProblemTypes.MULTICLASS

def score(self, X, y, objectives):
"""Evaluate model performance on current and additional objectives
Expand Down
34 changes: 8 additions & 26 deletions evalml/pipelines/pipeline_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from evalml.exceptions import IllFormattedClassNameError
from evalml.objectives import get_objective
from evalml.problem_types import handle_problem_types
from evalml.utils import (
Logger,
classproperty,
Expand All @@ -36,28 +35,16 @@ def component_graph(cls):
"""
return NotImplementedError("This pipeline must have `component_graph` as a class variable.")

@property
@classmethod
@abstractmethod
def supported_problem_types(cls):
"""Returns a list of ProblemTypes that this pipeline supports

Returns:
list(str/ProblemType): list of ProblemType objects or strings that this pipeline supports
"""
return NotImplementedError("This pipeline must have `supported_problem_types` as a class variable.")

custom_hyperparameters = None
custom_name = None
problem_type = None

def __init__(self, parameters, random_state=0):
"""Machine learning pipeline made out of transformers and a estimator.

Required Class Variables:
component_graph (list): List of components in order. Accepts strings or ComponentBase objects in the list

supported_problem_types (list): List of problem types for this pipeline. Accepts strings or ProbemType enum in the list.

Arguments:
parameters (dict): dictionary with component names as keys and dictionary of that component's parameters as values.
An empty dictionary {} implies using all default values for component parameters.
Expand All @@ -67,12 +54,11 @@ def __init__(self, parameters, random_state=0):
self.component_graph = [self._instantiate_component(c, parameters) for c in self.component_graph]
self.input_feature_names = {}
self.results = {}
self.supported_problem_types = [handle_problem_types(problem_type) for problem_type in self.supported_problem_types]
self.estimator = self.component_graph[-1] if isinstance(self.component_graph[-1], Estimator) else None
if self.estimator is None:
raise ValueError("A pipeline must have an Estimator as the last component in component_graph.")

self._validate_problem_types(self.supported_problem_types)
self._validate_estimator_problem_type()

@classproperty
def name(cls):
Expand Down Expand Up @@ -111,16 +97,12 @@ def _generate_summary(component_graph):

return _generate_summary(cls.component_graph)

def _validate_problem_types(self, problem_types):
"""Validates provided `problem_types` against the estimator in `self.component_graph`

Arguments:
problem_types (list): list of ProblemTypes
"""
def _validate_estimator_problem_type(self):
"""Validates this pipeline's problem_type against that of the estimator from `self.component_graph`"""
estimator_problem_types = self.estimator.supported_problem_types
for problem_type in self.supported_problem_types:
if problem_type not in estimator_problem_types:
raise ValueError("Problem type {} not valid for this component graph. Valid problem types include {}.".format(problem_type, estimator_problem_types))
if self.problem_type not in estimator_problem_types:
raise ValueError("Problem type {} not valid for this component graph. Valid problem types include {}."
.format(self.problem_type, estimator_problem_types))

def _instantiate_component(self, component, parameters):
"""Instantiates components with parameters in `parameters`"""
Expand Down Expand Up @@ -168,7 +150,7 @@ def describe(self):
dict: dictionary of all component parameters if return_dict is True, else None
"""
logger.log_title(self.name)
logger.log("Supported Problem Types: {}".format(', '.join([str(problem_type) for problem_type in self.supported_problem_types])))
logger.log("Problem Type: {}".format(self.problem_type))
logger.log("Model Family: {}".format(str(self.model_family)))

if self.estimator.name in self.input_feature_names:
Expand Down
1 change: 0 additions & 1 deletion evalml/pipelines/regression/catboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ class CatBoostRegressionPipeline(RegressionPipeline):
Note: impute_strategy must support both string and numeric data
"""
component_graph = ['Simple Imputer', 'CatBoost Regressor']
supported_problem_types = ['regression']
custom_hyperparameters = {
"impute_strategy": ["most_frequent"],
}
1 change: 0 additions & 1 deletion evalml/pipelines/regression/linear_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,3 @@
class LinearRegressionPipeline(RegressionPipeline):
"""Linear Regression Pipeline for regression problems"""
component_graph = ['One Hot Encoder', 'Simple Imputer', 'Standard Scaler', 'Linear Regressor']
supported_problem_types = ['regression']
1 change: 0 additions & 1 deletion evalml/pipelines/regression/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,3 @@ class RFRegressionPipeline(RegressionPipeline):
"""Random Forest Pipeline for regression problems"""
custom_name = "Random Forest Regression Pipeline"
component_graph = ['One Hot Encoder', 'Simple Imputer', 'RF Regressor Select From Model', 'Random Forest Regressor']
supported_problem_types = ['regression']
3 changes: 2 additions & 1 deletion evalml/pipelines/regression_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from evalml.pipelines import PipelineBase
from evalml.problem_types import ProblemTypes


class RegressionPipeline(PipelineBase):
supported_problem_types = ['regression']
problem_type = ProblemTypes.REGRESSION
9 changes: 3 additions & 6 deletions evalml/pipelines/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,13 @@ def get_pipelines(problem_type, model_families=None):
if model_families is not None and not isinstance(model_families, list):
raise TypeError("model_families parameter is not a list.")

problem_pipelines = []

if model_families:
model_families = [handle_model_family(model_family) for model_family in model_families]

problem_pipelines = []
problem_type = handle_problem_types(problem_type)
for p in all_pipelines():
problem_types = [handle_problem_types(pt) for pt in p.supported_problem_types]
if problem_type in problem_types:
if problem_type == handle_problem_types(p.problem_type):
problem_pipelines.append(p)

if model_families is None:
Expand Down Expand Up @@ -111,8 +109,7 @@ def list_model_families(problem_type):
problem_pipelines = []
problem_type = handle_problem_types(problem_type)
for p in all_pipelines():
problem_types = [handle_problem_types(pt) for pt in p.supported_problem_types]
if problem_type in problem_types:
if problem_type == handle_problem_types(p.problem_type):
problem_pipelines.append(p)

return list(set([p.model_family for p in problem_pipelines]))
5 changes: 2 additions & 3 deletions evalml/tests/pipeline_tests/test_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@
import pytest
from skopt.space import Real

from evalml.pipelines import PipelineBase
from evalml.pipelines import BinaryClassificationPipeline


@pytest.fixture
def test_pipeline():
class TestPipeline(PipelineBase):
class TestPipeline(BinaryClassificationPipeline):
component_graph = ['Simple Imputer', 'One Hot Encoder', 'Standard Scaler', 'Logistic Regression Classifier']
supported_problem_types = ['binary', 'multiclass']

hyperparameters = {
"penalty": ["l2"],
Expand Down
Loading