alteryx · angela97lin · Apr 24, 2020 · Apr 24, 2020 · Apr 24, 2020 · Apr 24, 2020
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -19,6 +19,7 @@ Changelog
         * Update make_pipeline_graph to not accidentally create empty file when testing if path is valid :pr:`649`
         * Fix pip installation warning about docsutils version, from boto dependency :pr:`664`
         * Removed zero division warning for F1/precision/recall metrics :pr:`671`
+        * Fixed `summary` for pipelines without estimators :pr:`707`
     * Changes
         * Updated default objective for binary/multiseries classification to log loss :pr:`613`
         * Created classification and regression pipeline subclasses and removed objective as an attribute of pipeline classes :pr:`405`

diff --git a/docs/source/objectives/custom_objectives.ipynb b/docs/source/objectives/custom_objectives.ipynb
@@ -36,7 +36,6 @@
     "    name = \"Fraud Cost\"\n",
     "    needs_fitting = True\n",
     "    greater_is_better = False\n",
-    "    uses_extra_columns = True\n",
     "    score_needs_proba = False\n",
     "\n",
     "    def __init__(self, retry_percentage=.5, interchange_fee=.02,\n",
@@ -63,35 +62,7 @@
     "\n",
     "    def decision_function(self, y_predicted, extra_cols, threshold):\n",
     "        \"\"\"Determine if transaction is fraud given predicted probabilities,\n",
-    "            dataframe with transaction amount, and threshold\"\"\"\n",
-    "\n",
-    "        transformed_probs = (y_predicted * extra_cols[self.amount_col])\n",
-    "        return transformed_probs > threshold\n",
-    "\n",
-    "    def objective_function(self, y_predicted, y_true, extra_cols):\n",
-    "        \"\"\"Calculate amount lost to fraud given predictions, true values, and dataframe\n",
-    "            with transaction amount\"\"\"\n",
-    "\n",
-    "        # extract transaction using the amount columns in users data\n",
-    "        transaction_amount = extra_cols[self.amount_col]\n",
-    "\n",
-    "        # amount paid if transaction is fraud\n",
-    "        fraud_cost = transaction_amount * self.fraud_payout_percentage\n",
-    "\n",
-    "        # money made from interchange fees on transaction\n",
-    "        interchange_cost = transaction_amount * (1 - self.retry_percentage) * self.interchange_fee\n",
-    "\n",
-    "        # calculate cost of missing fraudulent transactions\n",
-    "        false_negatives = (y_true & ~y_predicted) * fraud_cost\n",
-    "\n",
-    "        # calculate money lost from fees\n",
-    "        false_positives = (~y_true & y_predicted) * interchange_cost\n",
-    "\n",
-    "        loss = false_negatives.sum() + false_positives.sum()\n",
-    "\n",
-    "        loss_per_total_processed = loss / transaction_amount.sum()\n",
-    "\n",
-    "        return loss_per_total_processed\n"
+    "           "
    ]
   }
  ],
@@ -116,4 +87,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/evalml/pipelines/pipeline_base.py b/evalml/pipelines/pipeline_base.py
@@ -79,23 +79,19 @@ def summary(cls):
         """Returns a short summary of the pipeline structure, describing the list of components used.
         Example: Logistic Regression Classifier w/ Simple Imputer + One Hot Encoder
         """
-        def _generate_summary(component_graph):
-            component_graph = copy.copy(component_graph)
-            component_graph[-1] = handle_component(component_graph[-1])
-            estimator = component_graph[-1] if isinstance(component_graph[-1], Estimator) else None
-            if estimator is not None:
-                summary = "{}".format(estimator.name)
-            else:
-                summary = "Pipeline"
-            for index, component in enumerate(component_graph[:-1]):
-                component = handle_component(component)
-                if index == 0:
-                    summary += " w/ {}".format(component.name)
-                else:
-                    summary += " + {}".format(component.name)
+        component_graph = copy.copy(cls.component_graph)
+        if len(component_graph) == 0:
+            return ""
+        summary = "Pipeline"
+        component_graph[-1] = handle_component(component_graph[-1])
+
+        if isinstance(component_graph[-1], Estimator):
+            estimator = component_graph.pop()
+            summary = estimator.name
+        if len(component_graph) == 0:
             return summary
-
-        return _generate_summary(cls.component_graph)
+        component_names = [handle_component(component).name for component in component_graph]
+        return '{} w/ {}'.format(summary, ' + '.join(component_names))
 
     def _validate_estimator_problem_type(self):
         """Validates this pipeline's problem_type against that of the estimator from `self.component_graph`"""

diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_catboost_classification.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_catboost_classification.py
@@ -32,6 +32,7 @@ def test_catboost_init():
     clf = CatBoostBinaryClassificationPipeline(parameters=parameters, random_state=2)
     assert clf.parameters == parameters
     assert (clf.random_state.get_state()[0] == np.random.RandomState(2).get_state()[0])
+    assert clf.summary == 'CatBoost Classifier w/ Simple Imputer'
 
 
 def test_catboost_objective_tuning(X_y):

diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_logistic_regression.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_logistic_regression.py
@@ -31,6 +31,7 @@ def test_lor_init(X_y):
     clf = LogisticRegressionBinaryPipeline(parameters=parameters, random_state=1)
     assert clf.parameters == parameters
     assert (clf.random_state.get_state()[0] == np.random.RandomState(1).get_state()[0])
+    assert clf.summary == 'Logistic Regression Classifier w/ One Hot Encoder + Simple Imputer + Standard Scaler'
 
 
 def test_lor_objective_tuning(X_y):

diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_rf.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_rf.py
@@ -53,6 +53,7 @@ def test_rf_init(X_y):
 
     assert clf.parameters == expected_parameters
     assert (clf.random_state.get_state()[0] == np.random.RandomState(2).get_state()[0])
+    assert clf.summary == 'Random Forest Classifier w/ One Hot Encoder + Simple Imputer + RF Classifier Select From Model'
 
 
 def test_rf_objective_tuning(X_y):

diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_xgboost_classification.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_xgboost_classification.py
@@ -66,6 +66,7 @@ def test_xg_init(X_y):
 
     assert clf.parameters == expected_parameters
     assert (clf.random_state.get_state()[0] == np.random.RandomState(1).get_state()[0])
+    assert clf.summary == 'XGBoost Classifier w/ One Hot Encoder + Simple Imputer + RF Classifier Select From Model'
 
 
 def test_xgboost_objective_tuning(X_y):

diff --git a/evalml/tests/pipeline_tests/regression_pipeline_tests/test_catboost_regression.py b/evalml/tests/pipeline_tests/regression_pipeline_tests/test_catboost_regression.py
@@ -27,6 +27,7 @@ def test_catboost_init():
     clf = CatBoostRegressionPipeline(parameters=parameters, random_state=2)
     assert clf.parameters == parameters
     assert (clf.random_state.get_state()[0] == np.random.RandomState(2).get_state()[0])
+    assert clf.summary == 'CatBoost Regressor w/ Simple Imputer'
 
 
 def test_catboost_regression(X_y_reg):

diff --git a/evalml/tests/pipeline_tests/regression_pipeline_tests/test_linear_regression.py b/evalml/tests/pipeline_tests/regression_pipeline_tests/test_linear_regression.py
@@ -27,6 +27,7 @@ def test_lr_init(X_y_categorical_regression):
     clf = LinearRegressionPipeline(parameters=parameters, random_state=2)
     assert clf.parameters == parameters
     assert (clf.random_state.get_state()[0] == np.random.RandomState(2).get_state()[0])
+    assert clf.summary == 'Linear Regressor w/ One Hot Encoder + Simple Imputer + Standard Scaler'
 
 
 def test_linear_regression(X_y_categorical_regression):

diff --git a/evalml/tests/pipeline_tests/regression_pipeline_tests/test_rf_regression.py b/evalml/tests/pipeline_tests/regression_pipeline_tests/test_rf_regression.py
@@ -49,6 +49,7 @@ def test_rf_init(X_y_reg):
 
     assert clf.parameters == expected_parameters
     assert (clf.random_state.get_state()[0] == np.random.RandomState(2).get_state()[0])
+    assert clf.summary == 'Random Forest Regressor w/ One Hot Encoder + Simple Imputer + RF Regressor Select From Model'
 
 
 def test_rf_regression(X_y_categorical_regression):

diff --git a/evalml/tests/pipeline_tests/regression_pipeline_tests/test_xgboost_regression.py b/evalml/tests/pipeline_tests/regression_pipeline_tests/test_xgboost_regression.py
@@ -41,6 +41,7 @@ def test_xg_init(X_y_reg):
 
     assert clf.parameters == parameters
     assert (clf.random_state.get_state()[0] == np.random.RandomState(1).get_state()[0])
+    assert clf.summary == 'XGBoost Regressor w/ One Hot Encoder + Simple Imputer + RF Regressor Select From Model'
 
 
 def test_xgboost_regression(X_y_reg):

diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py
@@ -243,13 +243,6 @@ class testillformattednamepipeline(BinaryClassificationPipeline):
         testillformattednamepipeline.name == "Test Illformatted Name Pipeline"
 
 
-def test_summary(X_y, lr_pipeline):
-    X, y = X_y
-    clf = lr_pipeline
-    assert clf.summary == 'Logistic Regression Classifier w/ One Hot Encoder + Simple Imputer + Standard Scaler'
-    assert LogisticRegressionBinaryPipeline.summary == 'Logistic Regression Classifier w/ One Hot Encoder + Simple Imputer + Standard Scaler'
-
-
 def test_estimator_not_last(X_y):
     X, y = X_y
 
@@ -470,3 +463,25 @@ def test_score_with_objective_that_requires_predict_proba(mock_predict, dummy_re
     with pytest.raises(ValueError, match="Objective `AUC` does not support score_needs_proba"):
         dummy_regression_pipeline.score(X, y, ['recall', 'auc'])
     mock_predict.assert_called()
+
+
+def test_pipeline_summary():
+    class MockPipelineWithoutEstimator(PipelineBase):
+        component_graph = ["Simple Imputer", "One Hot Encoder"]
+    assert MockPipelineWithoutEstimator.summary == "Pipeline w/ Simple Imputer + One Hot Encoder"
+
+    class MockPipelineWithSingleComponent(PipelineBase):
+        component_graph = ["Simple Imputer"]
+    assert MockPipelineWithSingleComponent.summary == "Pipeline w/ Simple Imputer"
+
+    class MockPipelineWithSingleEstimator(PipelineBase):
+        component_graph = ["Random Forest Classifier"]
+    assert MockPipelineWithSingleEstimator.summary == "Random Forest Classifier"
+
+    class MockPipelineWithNoComponents(PipelineBase):
+        component_graph = []
+    assert MockPipelineWithNoComponents.summary == ""
+
+    class MockPipeline(PipelineBase):
+        component_graph = ["Simple Imputer", "One Hot Encoder", "Random Forest Classifier"]
+    assert MockPipeline.summary == "Random Forest Classifier w/ Simple Imputer + One Hot Encoder"