Skip to content

Commit

Permalink
Adding default_parameters classproperty to components and pipelines. (#…
Browse files Browse the repository at this point in the history
…879)

* Adding default_parameters classproperty to components and pipelines.

* Adding PR 879 to changelog

* Updating test_default_parameters so that classes that cannot be imported are skipped.

* Fixing typo in cannot_check_because_base_or_not_installed

* Fixing documentation and splitting test_default_parameters.py into test_pipelines.py and test_components.py

* Adding DateTimeFeaturizer to test_default_parameters.

* Adding default_parameters classproperty to components and pipelines.

* Adding PR 879 to changelog

* Updating test_default_parameters so that classes that cannot be imported are skipped.

* Fixing typo in cannot_check_because_base_or_not_installed

* Fixing documentation and splitting test_default_parameters.py into test_pipelines.py and test_components.py

* Adding DateTimeFeaturizer to test_default_parameters.

* Fixing imports in test_components

* Fixing redundant import in test_components.py

* Adding default_parameters to estimator_class, pipeline_class, transformer_class rst files.

* Modifying implementation of default_properties so that DateTimeFeaturization does not have custom code.
  • Loading branch information
freddyaboulton committed Jun 26, 2020
1 parent 1740209 commit 1daa7b0
Show file tree
Hide file tree
Showing 10 changed files with 84 additions and 11 deletions.
4 changes: 3 additions & 1 deletion docs/source/_templates/estimator_class.rst
Expand Up @@ -5,14 +5,16 @@
.. inheritance-diagram:: {{ objname }}

.. autoclass:: {{ objname }}
{% set class_attributes = ['name', 'model_family', 'hyperparameter_ranges', 'supported_problem_types'] %}
{% set class_attributes = ['name', 'model_family', 'hyperparameter_ranges', 'supported_problem_types',
'default_parameters'] %}

{% block attributes %}
.. Class attributes:
.. autoattribute:: name
.. autoattribute:: model_family
.. autoattribute:: supported_problem_types
.. autoattribute:: hyperparameter_ranges
.. autoattribute:: default_parameters
{% endblock %}

{% block instance_attributes %}
Expand Down
5 changes: 4 additions & 1 deletion docs/source/_templates/pipeline_class.rst
Expand Up @@ -5,7 +5,9 @@
.. inheritance-diagram:: {{ objname }}

.. autoclass:: {{ objname }}
{% set class_attributes = ['name', 'custom_name', 'summary', 'component_graph', 'problem_type', 'model_family', 'hyperparameters', 'custom_hyperparameters'] %}
{% set class_attributes = ['name', 'custom_name', 'summary', 'component_graph', 'problem_type',
'model_family', 'hyperparameters', 'custom_hyperparameters',
'default_parameters'] %}


{% block attributes %}
Expand All @@ -19,6 +21,7 @@
.. autoattribute:: model_family
.. autoattribute:: hyperparameters
.. autoattribute:: custom_hyperparameters
.. autoattribute:: default_parameters
{% endblock %}

{% block instance_attributes %}
Expand Down
3 changes: 2 additions & 1 deletion docs/source/_templates/transformer_class.rst
Expand Up @@ -6,13 +6,14 @@

.. autoclass:: {{ objname }}

{% set class_attributes = ['name', 'model_family', 'hyperparameter_ranges'] %}
{% set class_attributes = ['name', 'model_family', 'hyperparameter_ranges', 'default_parameters'] %}

{% block attributes %}
.. Class attributes:
.. autoattribute:: name
.. autoattribute:: model_family
.. autoattribute:: hyperparameter_ranges
.. autoattribute:: default_parameters
{% endblock %}

{% block instance_attributes %}
Expand Down
1 change: 1 addition & 0 deletions docs/source/changelog.rst
Expand Up @@ -18,6 +18,7 @@ Changelog
* Added new utility functions necessary for generating dynamic preprocessing pipelines :pr:`852`
* Added kwargs to all components :pr:`863`
* Added SelectColumns transformer :pr:`873`
* Added `default_parameters` class property to components and pipelines :pr:`879`
* Fixes
* Fixed bug where SimpleImputer cannot handle dropped columns :pr:`846`
* Fixed bug where PerColumnImputer cannot handle dropped columns :pr:`855`
Expand Down
23 changes: 22 additions & 1 deletion evalml/pipelines/components/component_base.py
Expand Up @@ -2,13 +2,19 @@
from abc import ABC, abstractmethod

from evalml.exceptions import MethodPropertyNotFoundError
from evalml.utils import get_logger, get_random_state, log_subtitle
from evalml.utils import (
classproperty,
get_logger,
get_random_state,
log_subtitle
)

logger = get_logger(__file__)


class ComponentBase(ABC):
"""Base class for all components."""
_default_parameters = None

def __init__(self, parameters=None, component_obj=None, random_state=0, **kwargs):
self.random_state = get_random_state(random_state)
Expand All @@ -32,6 +38,21 @@ def parameters(self):
"""Returns the parameters which were used to initialize the component"""
return copy.copy(self._parameters)

@classproperty
def default_parameters(cls):
"""Returns the default parameters for this component.
Our convention is that Component.default_parameters == Component().parameters.
Returns:
dict: default parameters for this component.
"""

if cls._default_parameters is None:
cls._default_parameters = cls().parameters

return cls._default_parameters

def clone(self, random_state=0):
"""Constructs a new component with the same parameters
Expand Down
1 change: 1 addition & 0 deletions evalml/pipelines/components/utils.py
Expand Up @@ -26,6 +26,7 @@
PerColumnImputer,
RFClassifierSelectFromModel,
RFRegressorSelectFromModel,
SelectColumns,
SimpleImputer,
StandardScaler
)
Expand Down
14 changes: 14 additions & 0 deletions evalml/pipelines/pipeline_base.py
Expand Up @@ -270,6 +270,20 @@ def parameters(self):
"""
return {c.name: copy.copy(c.parameters) for c in self.component_graph if c.parameters}

@classproperty
def default_parameters(cls):
"""Returns the default parameter dictionary for this pipeline.
Returns:
dict: dictionary of all component default parameters.
"""
defaults = {}
for c in cls.component_graph:
component = handle_component_class(c)
if component.default_parameters:
defaults[component.name] = component.default_parameters
return defaults

@property
def feature_importance(self):
"""Return importance associated with each feature. Features dropped by feature selection are excluded"""
Expand Down
32 changes: 28 additions & 4 deletions evalml/tests/component_tests/test_components.py
Expand Up @@ -10,6 +10,7 @@
from evalml.model_family import ModelFamily
from evalml.pipelines.components import (
ComponentBase,
DateTimeFeaturization,
DropColumns,
ElasticNetClassifier,
ElasticNetRegressor,
Expand All @@ -23,6 +24,7 @@
RandomForestClassifier,
RandomForestRegressor,
RFClassifierSelectFromModel,
SelectColumns,
SimpleImputer,
StandardScaler,
Transformer,
Expand Down Expand Up @@ -441,16 +443,29 @@ def test_transformer_transform_output_type(X_y):
.format(component_class.name, type(X),
X.columns if isinstance(X, pd.DataFrame) else None, type(y),
y.name if isinstance(y, pd.Series) else None))

component = component_class()

component.fit(X, y=y)
transform_output = component.transform(X, y=y)
assert isinstance(transform_output, pd.DataFrame)
assert transform_output.shape == X.shape
assert (transform_output.columns == X_cols_expected).all()

if isinstance(component, SelectColumns):
assert transform_output.shape == (X.shape[0], 0)
assert isinstance(transform_output.columns, pd.Index)
else:
assert transform_output.shape == X.shape
assert (transform_output.columns == X_cols_expected).all()

transform_output = component.fit_transform(X, y=y)
assert isinstance(transform_output, pd.DataFrame)
assert transform_output.shape == X.shape
assert (transform_output.columns == X_cols_expected).all()

if isinstance(component, SelectColumns):
assert transform_output.shape == (X.shape[0], 0)
assert isinstance(transform_output.columns, pd.Index)
else:
assert transform_output.shape == X.shape
assert (transform_output.columns == X_cols_expected).all()


def test_estimator_predict_output_type(X_y):
Expand Down Expand Up @@ -494,3 +509,12 @@ def test_estimator_predict_output_type(X_y):
assert isinstance(predict_proba_output, pd.DataFrame)
assert predict_proba_output.shape == (len(y), len(np.unique(y)))
assert (predict_proba_output.columns == y_cols_expected).all()


components = list(all_components().items()) + [(DateTimeFeaturization.name, DateTimeFeaturization)]


@pytest.mark.parametrize("class_name,cls", components)
def test_default_parameters(class_name, cls):

assert cls.default_parameters == cls().parameters, f"{class_name}'s default parameters don't match __init__."
6 changes: 3 additions & 3 deletions evalml/tests/component_tests/test_utils.py
Expand Up @@ -14,9 +14,9 @@

def test_all_components(has_minimal_dependencies):
if has_minimal_dependencies:
assert len(all_components()) == 17
assert len(all_components()) == 18
else:
assert len(all_components()) == 21
assert len(all_components()) == 22


def make_mock_import_module(libs_to_exclude):
Expand All @@ -29,7 +29,7 @@ def _import_module(library):

@patch('importlib.import_module', make_mock_import_module({'xgboost', 'catboost'}))
def test_all_components_core_dependencies_mock():
assert len(all_components()) == 17
assert len(all_components()) == 18


def test_handle_component_class_names():
Expand Down
6 changes: 6 additions & 0 deletions evalml/tests/pipeline_tests/test_pipelines.py
Expand Up @@ -866,3 +866,9 @@ def test_clone_fitted(X_y, lr_pipeline):
pipeline_clone.fit(X, y)
X_t_clone = pipeline_clone.predict_proba(X)
pd.testing.assert_frame_equal(X_t, X_t_clone)


@pytest.mark.parametrize("cls", all_pipelines())
def test_pipeline_default_parameters(cls):

assert cls.default_parameters == cls({}).parameters, f"{cls.__name__}'s default parameters don't match __init__."

0 comments on commit 1daa7b0

Please sign in to comment.