alteryx · eccabay · Sep 29, 2020 · Sep 23, 2020 · Sep 23, 2020 · Sep 23, 2020
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -8,6 +8,7 @@ Release Notes
         * Added `detect_problem_type` to `problem_type/utils.py` to automatically detect the problem type given targets :pr:`1194`
         * Added LightGBM to AutoMLSearch :pr:`1199`
         * Updates scikit-learn and scikit-optimize to use latest versions - 0.23.2 and 0.8.1 respectively :pr:`1141`
+        * Added `__str__` and `__repr__` for pipelines and components :pr:`1218`
         * Included internal target check for both training and validation data in AutoMLSearch :pr:`1226`
         * Add `ProblemTypes.all_problem_types` helper to get list of supported problem types :pr:`1219`
         * Added `DecisionTreeClassifier` and `DecisionTreeRegressor` classes :pr:`1223`

diff --git a/evalml/pipelines/components/component_base.py b/evalml/pipelines/components/component_base.py
@@ -10,7 +10,8 @@
     classproperty,
     get_logger,
     get_random_state,
-    log_subtitle
+    log_subtitle,
+    safe_repr
 )
 
 logger = get_logger(__file__)
@@ -151,3 +152,10 @@ def __eq__(self, other):
             if getattr(self, attribute) != getattr(other, attribute):
                 return False
         return True
+
+    def __str__(self):
+        return self.name
+
+    def __repr__(self):
+        parameters_repr = ', '.join([f'{key}={safe_repr(value)}' for key, value in self.parameters.items()])
+        return f'{(type(self).__name__)}({parameters_repr})'
diff --git a/evalml/pipelines/pipeline_base.py b/evalml/pipelines/pipeline_base.py
@@ -27,7 +27,8 @@
     import_or_raise,
     jupyter_check,
     log_subtitle,
-    log_title
+    log_title,
+    safe_repr
 )
 
 logger = get_logger(__file__)
@@ -487,3 +488,14 @@ def __eq__(self, other):
             if getattr(self, attribute) != getattr(other, attribute):
                 return False
         return True
+
+    def __str__(self):
+        return self.name
+
+    def __repr__(self):
+
+        def repr_component(parameters):
+            return ', '.join([f"'{key}': {safe_repr(value)}" for key, value in parameters.items()])
+
+        parameters_repr = ' '.join([f"'{component}':{{{repr_component(parameters)}}}," for component, parameters in self.parameters.items()])
+        return f'{(type(self).__name__)}(parameters={{{parameters_repr}}})'
diff --git a/evalml/tests/component_tests/test_components.py b/evalml/tests/component_tests/test_components.py
@@ -16,13 +16,21 @@
 )
 from evalml.model_family import ModelFamily
 from evalml.pipelines.components import (
+    LSA,
+    BaselineClassifier,
+    BaselineRegressor,
+    CatBoostClassifier,
+    CatBoostRegressor,
     ComponentBase,
+    DateTimeFeaturizer,
     DropColumns,
+    DropNullColumns,
     ElasticNetClassifier,
     ElasticNetRegressor,
     Estimator,
     ExtraTreesClassifier,
     ExtraTreesRegressor,
+    Imputer,
     LightGBMClassifier,
     LinearRegressor,
     LogisticRegressionClassifier,
@@ -31,11 +39,14 @@
     RandomForestClassifier,
     RandomForestRegressor,
     RFClassifierSelectFromModel,
+    RFRegressorSelectFromModel,
     SelectColumns,
     SimpleImputer,
     StandardScaler,
+    TextFeaturizer,
     Transformer,
-    XGBoostClassifier
+    XGBoostClassifier,
+    XGBoostRegressor
 )
 from evalml.pipelines.components.utils import (
     _all_estimators,
@@ -114,23 +125,40 @@ def test_describe(test_classes):
 
 def test_describe_component():
     enc = OneHotEncoder()
-    imputer = SimpleImputer("mean")
+    imputer = Imputer()
+    simple_imputer = SimpleImputer("mean")
     column_imputer = PerColumnImputer({"a": "mean", "b": ("constant", 100)})
     scaler = StandardScaler()
-    feature_selection = RFClassifierSelectFromModel(n_estimators=10, number_features=5, percent_features=0.3, threshold=-np.inf)
+    feature_selection_clf = RFClassifierSelectFromModel(n_estimators=10, number_features=5, percent_features=0.3, threshold=-np.inf)
+    feature_selection_reg = RFRegressorSelectFromModel(n_estimators=10, number_features=5, percent_features=0.3, threshold=-np.inf)
+    drop_col_transformer = DropColumns(columns=['col_one', 'col_two'])
+    drop_null_transformer = DropNullColumns()
+    datetime = DateTimeFeaturizer()
+    text_featurizer = TextFeaturizer()
+    lsa = LSA()
     assert enc.describe(return_dict=True) == {'name': 'One Hot Encoder', 'parameters': {'top_n': 10,
                                                                                         'categories': None,
                                                                                         'drop': None,
                                                                                         'handle_unknown': 'ignore',
                                                                                         'handle_missing': 'error'}}
-    drop_col_transformer = DropColumns(columns=['col_one', 'col_two'])
-    assert imputer.describe(return_dict=True) == {'name': 'Simple Imputer', 'parameters': {'impute_strategy': 'mean', 'fill_value': None}}
+    assert imputer.describe(return_dict=True) == {'name': 'Imputer', 'parameters': {'categorical_impute_strategy': "most_frequent",
+                                                                                    'categorical_fill_value': None,
+                                                                                    'numeric_impute_strategy': "mean",
+                                                                                    'numeric_fill_value': None}}
+    assert simple_imputer.describe(return_dict=True) == {'name': 'Simple Imputer', 'parameters': {'impute_strategy': 'mean', 'fill_value': None}}
     assert column_imputer.describe(return_dict=True) == {'name': 'Per Column Imputer', 'parameters': {'impute_strategies': {'a': 'mean', 'b': ('constant', 100)}, 'default_impute_strategy': 'most_frequent'}}
     assert scaler.describe(return_dict=True) == {'name': 'Standard Scaler', 'parameters': {}}
-    assert feature_selection.describe(return_dict=True) == {'name': 'RF Classifier Select From Model', 'parameters': {'number_features': 5, 'n_estimators': 10, 'max_depth': None, 'percent_features': 0.3, 'threshold': -np.inf, 'n_jobs': -1}}
+    assert feature_selection_clf.describe(return_dict=True) == {'name': 'RF Classifier Select From Model', 'parameters': {'number_features': 5, 'n_estimators': 10, 'max_depth': None, 'percent_features': 0.3, 'threshold': -np.inf, 'n_jobs': -1}}
+    assert feature_selection_reg.describe(return_dict=True) == {'name': 'RF Regressor Select From Model', 'parameters': {'number_features': 5, 'n_estimators': 10, 'max_depth': None, 'percent_features': 0.3, 'threshold': -np.inf, 'n_jobs': -1}}
     assert drop_col_transformer.describe(return_dict=True) == {'name': 'Drop Columns Transformer', 'parameters': {'columns': ['col_one', 'col_two']}}
+    assert drop_null_transformer.describe(return_dict=True) == {'name': 'Drop Null Columns Transformer', 'parameters': {'pct_null_threshold': 1.0}}
+    assert datetime.describe(return_dict=True) == {'name': 'DateTime Featurization Component', 'parameters': {'features_to_extract': ['year', 'month', 'day_of_week', 'hour']}}
+    assert text_featurizer.describe(return_dict=True) == {'name': 'Text Featurization Component', 'parameters': {'text_columns': None}}
+    assert lsa.describe(return_dict=True) == {'name': 'LSA Transformer', 'parameters': {'text_columns': None}}
 
     # testing estimators
+    base_classifier = BaselineClassifier()
+    base_regressor = BaselineRegressor()
     lr_classifier = LogisticRegressionClassifier()
     en_classifier = ElasticNetClassifier()
     en_regressor = ElasticNetRegressor()
@@ -139,6 +167,8 @@ def test_describe_component():
     rf_classifier = RandomForestClassifier(n_estimators=10, max_depth=3)
     rf_regressor = RandomForestRegressor(n_estimators=10, max_depth=3)
     linear_regressor = LinearRegressor()
+    assert base_classifier.describe(return_dict=True) == {'name': 'Baseline Classifier', 'parameters': {'strategy': 'mode'}}
+    assert base_regressor.describe(return_dict=True) == {'name': 'Baseline Regressor', 'parameters': {'strategy': 'mean'}}
     assert lr_classifier.describe(return_dict=True) == {'name': 'Logistic Regression Classifier', 'parameters': {'penalty': 'l2', 'C': 1.0, 'n_jobs': -1, 'multi_class': 'auto', 'solver': 'lbfgs'}}
     assert en_classifier.describe(return_dict=True) == {'name': 'Elastic Net Classifier', 'parameters': {'alpha': 0.5, 'l1_ratio': 0.5, 'n_jobs': -1, 'max_iter': 1000, "loss": 'log', 'penalty': 'elasticnet'}}
     assert en_regressor.describe(return_dict=True) == {'name': 'Elastic Net Regressor', 'parameters': {'alpha': 0.5, 'l1_ratio': 0.5, 'max_iter': 1000, 'normalize': False}}
@@ -149,7 +179,16 @@ def test_describe_component():
     assert linear_regressor.describe(return_dict=True) == {'name': 'Linear Regressor', 'parameters': {'fit_intercept': True, 'normalize': False, 'n_jobs': -1}}
     try:
         xgb_classifier = XGBoostClassifier(eta=0.1, min_child_weight=1, max_depth=3, n_estimators=75)
+        xgb_regressor = XGBoostRegressor(eta=0.1, min_child_weight=1, max_depth=3, n_estimators=75)
         assert xgb_classifier.describe(return_dict=True) == {'name': 'XGBoost Classifier', 'parameters': {'eta': 0.1, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 75}}
+        assert xgb_regressor.describe(return_dict=True) == {'name': 'XGBoost Regressor', 'parameters': {'eta': 0.1, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 75}}
+    except ImportError:
+        pass
+    try:
+        cb_classifier = CatBoostClassifier()
+        cb_regressor = CatBoostRegressor()
+        assert cb_classifier.describe(return_dict=True) == {'name': 'CatBoost Classifier', 'parameters': {'allow_writing_files': False, 'n_estimators': 10, 'eta': 0.03, 'max_depth': 6, 'bootstrap_type': None, 'silent': True}}
+        assert cb_regressor.describe(return_dict=True) == {'name': 'CatBoost Regressor', 'parameters': {'allow_writing_files': False, 'n_estimators': 10, 'eta': 0.03, 'max_depth': 6, 'bootstrap_type': None, 'silent': False}}
     except ImportError:
         pass
     try:
@@ -855,6 +894,34 @@ def test_component_equality_all_components(component_class):
     assert component == equal_component
 
 
+def test_mock_component_str(test_classes):
+    MockComponent, MockEstimator, MockTransformer = test_classes
+
+    assert str(MockComponent()) == 'Mock Component'
+    assert str(MockEstimator()) == 'Mock Estimator'
+    assert str(MockTransformer()) == 'Mock Transformer'
+
+
+def test_mock_component_repr():
+    component = MockFitComponent()
+    assert repr(component) == 'MockFitComponent(param_a=2, param_b=10)'
+
+    component_with_params = MockFitComponent(param_a=29, param_b=None, random_state=42)
+    assert repr(component_with_params) == 'MockFitComponent(param_a=29, param_b=None)'
+
+    component_with_nan = MockFitComponent(param_a=np.nan, param_b=float('nan'))
+    assert repr(component_with_nan) == 'MockFitComponent(param_a=np.nan, param_b=np.nan)'
+
+    component_with_inf = MockFitComponent(param_a=np.inf, param_b=float('-inf'))
+    assert repr(component_with_inf) == "MockFitComponent(param_a=float('inf'), param_b=float('-inf'))"
+
+
+@pytest.mark.parametrize("component_class", all_components())
+def test_component_str(component_class):
+    component = component_class()
+    assert str(component) == component.name
+
+
 @pytest.mark.parametrize("categorical", [{
     "type": Categorical(["mean", "median", "mode"]),
     "categories": Categorical(["blue", "green"])

diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py
@@ -1267,3 +1267,54 @@ def test_pipeline_equality_different_fitted_data(problem_type, X_y_binary, X_y_m
     pipeline_diff_data.fit(X, y)
 
     assert pipeline != pipeline_diff_data
+
+
+def test_pipeline_str():
+
+    class MockBinaryPipeline(BinaryClassificationPipeline):
+        name = "Mock Binary Pipeline"
+        component_graph = ['Imputer', 'Random Forest Classifier']
+
+    class MockMulticlassPipeline(MulticlassClassificationPipeline):
+        name = "Mock Multiclass Pipeline"
+        component_graph = ['Imputer', 'Random Forest Classifier']
+
+    class MockRegressionPipeline(RegressionPipeline):
+        name = "Mock Regression Pipeline"
+        component_graph = ['Imputer', 'Random Forest Regressor']
+
+    binary_pipeline = MockBinaryPipeline(parameters={})
+    multiclass_pipeline = MockMulticlassPipeline(parameters={})
+    regression_pipeline = MockRegressionPipeline(parameters={})
+
+    assert str(binary_pipeline) == "Mock Binary Pipeline"
+    assert str(multiclass_pipeline) == "Mock Multiclass Pipeline"
+    assert str(regression_pipeline) == "Mock Regression Pipeline"
+
+
+@pytest.mark.parametrize("pipeline_class", [BinaryClassificationPipeline, MulticlassClassificationPipeline, RegressionPipeline])
+def test_pipeline_repr(pipeline_class):
+    if pipeline_class in [BinaryClassificationPipeline, MulticlassClassificationPipeline]:
+        final_estimator = 'Random Forest Classifier'
+    else:
+        final_estimator = 'Random Forest Regressor'
+
+    class MockPipeline(pipeline_class):
+        name = "Mock Pipeline"
+        component_graph = ['Imputer', final_estimator]
+
+    pipeline = MockPipeline(parameters={})
+    expected_repr = f"MockPipeline(parameters={{'Imputer':{{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'categorical_fill_value': None, 'numeric_fill_value': None}}, '{final_estimator}':{{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}},}})"
+    assert repr(pipeline) == expected_repr
+
+    pipeline_with_parameters = MockPipeline(parameters={'Imputer': {'numeric_fill_value': 42}})
+    expected_repr = f"MockPipeline(parameters={{'Imputer':{{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'categorical_fill_value': None, 'numeric_fill_value': 42}}, '{final_estimator}':{{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}},}})"
+    assert repr(pipeline_with_parameters) == expected_repr
+
+    pipeline_with_inf_parameters = MockPipeline(parameters={'Imputer': {'numeric_fill_value': float('inf'), 'categorical_fill_value': np.inf}})
+    expected_repr = f"MockPipeline(parameters={{'Imputer':{{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'categorical_fill_value': float('inf'), 'numeric_fill_value': float('inf')}}, '{final_estimator}':{{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}},}})"
+    assert repr(pipeline_with_inf_parameters) == expected_repr
+
+    pipeline_with_nan_parameters = MockPipeline(parameters={'Imputer': {'numeric_fill_value': float('nan'), 'categorical_fill_value': np.nan}})
+    expected_repr = f"MockPipeline(parameters={{'Imputer':{{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'categorical_fill_value': np.nan, 'numeric_fill_value': np.nan}}, '{final_estimator}':{{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}},}})"
+    assert repr(pipeline_with_nan_parameters) == expected_repr
diff --git a/evalml/utils/__init__.py b/evalml/utils/__init__.py
@@ -1,3 +1,3 @@
 from .logger import get_logger, log_subtitle, log_title
-from .gen_utils import classproperty, import_or_raise, convert_to_seconds, get_random_state, check_random_state_equality, get_random_seed, SEED_BOUNDS, jupyter_check
+from .gen_utils import classproperty, import_or_raise, convert_to_seconds, get_random_state, check_random_state_equality, get_random_seed, SEED_BOUNDS, jupyter_check, safe_repr
 from .cli_utils import print_info, get_evalml_root, get_installed_packages, get_sys_info, print_sys_info, print_deps
diff --git a/evalml/utils/gen_utils.py b/evalml/utils/gen_utils.py
@@ -3,6 +3,7 @@
 from collections import namedtuple
 
 import numpy as np
+import pandas as pd
 from sklearn.utils import check_random_state
 
 from evalml.exceptions import MissingComponentError
@@ -231,3 +232,20 @@ def jupyter_check():
         return True
     except NameError:
         return False
+
+
+def safe_repr(value):
+    """Convert the given value into a string that can safely be used for repr
+
+    Arguments:
+        value: the item to convert
+
+    Returns:
+        String representation of the value
+    """
+    if isinstance(value, float):
+        if pd.isna(value):
+            return 'np.nan'
+        if np.isinf(value):
+            return f"float('{repr(value)}')"
+    return repr(value)