alteryx · freddyaboulton · Dec 16, 2020 · Dec 11, 2020 · Dec 14, 2020 · Dec 14, 2020
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -16,6 +16,7 @@ Release Notes
         * Add woodwork support for more utility and graph methods :pr:`1544`
         * Changed ``DateTimeFeaturizer`` to encode features as int :pr:`1479`
         * Added `Linear Discriminant Analysis Transformer` component for dimensionality reduction :pr:`1331`
+        * Added multiclass support for ``partial_dependence`` and ``graph_partial_dependence`` :pr:`1554`
     * Fixes
         * Fix Windows CI jobs: install ``numba`` via conda, required for ``shap`` :pr:`1490`
         * Added custom-index support for `reset-index-get_prediction_vs_actual_over_time_data` :pr:`1494`

diff --git a/evalml/model_understanding/graphs.py b/evalml/model_understanding/graphs.py
@@ -439,7 +439,12 @@ def partial_dependence(pipeline, X, feature, grid_resolution=100):
 
     Returns:
         pd.DataFrame: DataFrame with averaged predictions for all points in the grid averaged
-            over all samples of X and the values used to calculate those predictions.
+            over all samples of X and the values used to calculate those predictions. The dataframe will
+            contain two columns: "feature_values" (grid points at which the partial dependence was calculated) and
+            "partial_dependence" (the partial dependence at that feature value). For classification problems, there
+            will be a third column called "class_label" (the class label for which the partial
+            dependence was calculated). For binary classification, the partial dependence is only calculated for the
+            "positive" class.
 
     """
     X = _convert_to_woodwork_structure(X)
@@ -462,11 +467,21 @@ def partial_dependence(pipeline, X, feature, grid_resolution=100):
         # Delete scikit-learn attributes that were temporarily set
         del pipeline._estimator_type
         del pipeline.feature_importances_
-    return pd.DataFrame({"feature_values": values[0],
-                         "partial_dependence": avg_pred[0]})
+    classes = None
+    if isinstance(pipeline, evalml.pipelines.BinaryClassificationPipeline):
+        classes = [pipeline.classes_[1]]
+    elif isinstance(pipeline, evalml.pipelines.MulticlassClassificationPipeline):
+        classes = pipeline.classes_
+
+    data = pd.DataFrame({"feature_values": np.tile(values[0], avg_pred.shape[0]),
+                         "partial_dependence": np.concatenate([pred for pred in avg_pred])})
+    if classes is not None:
+        data['class_label'] = np.repeat(classes, len(values[0]))
 
+    return data
 
-def graph_partial_dependence(pipeline, X, feature, grid_resolution=100):
+
+def graph_partial_dependence(pipeline, X, feature, class_label=None, grid_resolution=100):
     """Create an one-way partial dependence plot.
 
     Arguments:
@@ -476,6 +491,10 @@ def graph_partial_dependence(pipeline, X, feature, grid_resolution=100):
         feature (int, string): The target feature for which to create the partial dependence plot for.
             If feature is an int, it must be the index of the feature to use.
             If feature is a string, it must be a valid column name in X.
+        class_label (string, optional): Name of class to plot for multiclass problems. If None, will plot
+            the partial dependence for each class. This argument does not change behavior for regression or binary
+            classification pipelines. For binary classification, the partial dependence for the positive label will
+            always be displayed. Defaults to None.
 
     Returns:
         pd.DataFrame: pd.DataFrame with averaged predictions for all points in the grid averaged
@@ -485,19 +504,47 @@ def graph_partial_dependence(pipeline, X, feature, grid_resolution=100):
     _go = import_or_raise("plotly.graph_objects", error_msg="Cannot find dependency plotly.graph_objects")
     if jupyter_check():
         import_or_raise("ipywidgets", warning=True)
+    if isinstance(pipeline, evalml.pipelines.MulticlassClassificationPipeline) and class_label is not None:
+        if class_label not in pipeline.classes_:
+            msg = f"Class {class_label} is not one of the classes the pipeline was fit on: {', '.join(list(pipeline.classes_))}"
+            raise ValueError(msg)
 
     part_dep = partial_dependence(pipeline, X, feature=feature, grid_resolution=grid_resolution)
     feature_name = str(feature)
     title = f"Partial Dependence of '{feature_name}'"
     layout = _go.Layout(title={'text': title},
-                        xaxis={'title': f'{feature_name}', 'range': _calculate_axis_range(part_dep['feature_values'])},
-                        yaxis={'title': 'Partial Dependence', 'range': _calculate_axis_range(part_dep['partial_dependence'])})
-    data = []
-    data.append(_go.Scatter(x=part_dep['feature_values'],
+                        xaxis={'title': f'{feature_name}'},
+                        yaxis={'title': 'Partial Dependence'},
+                        showlegend=False)
+    if isinstance(pipeline, evalml.pipelines.MulticlassClassificationPipeline):
+        class_labels = [class_label] if class_label is not None else pipeline.classes_
+        _subplots = import_or_raise("plotly.subplots", error_msg="Cannot find dependency plotly.graph_objects")
+
+        # If the user passes in a value for class_label, we want to create a 1 x 1 subplot or else there would
+        # be an empty column in the plot and it would look awkward
+        rows, cols = ((len(class_labels) + 1) // 2, 2) if len(class_labels) > 1 else (1, len(class_labels))
+
+        # Don't specify share_xaxis and share_yaxis so that we get tickmarks in each subplot
+        fig = _subplots.make_subplots(rows=rows, cols=cols, subplot_titles=class_labels)
+        for i, label in enumerate(class_labels):
+
+            # Plotly trace indexing begins at 1 so we add 1 to i
+            fig.add_trace(_go.Scatter(x=part_dep.loc[part_dep.class_label == label, 'feature_values'],
+                                      y=part_dep.loc[part_dep.class_label == label, 'partial_dependence'],
+                                      line=dict(width=3),
+                                      name=label),
+                          row=(i + 2) // 2, col=(i % 2) + 1)
+        fig.update_layout(layout)
+        fig.update_xaxes(title=f'{feature_name}', range=_calculate_axis_range(part_dep['feature_values']))
+        fig.update_yaxes(range=_calculate_axis_range(part_dep['partial_dependence']))
+    else:
+        trace = _go.Scatter(x=part_dep['feature_values'],
                             y=part_dep['partial_dependence'],
                             name='Partial Dependence',
-                            line=dict(width=3)))
-    return _go.Figure(layout=layout, data=data)
+                            line=dict(width=3))
+        fig = _go.Figure(layout=layout, data=[trace])
+
+    return fig
 
 
 def _calculate_axis_range(arr):

diff --git a/evalml/tests/model_understanding_tests/test_graphs.py b/evalml/tests/model_understanding_tests/test_graphs.py
@@ -11,7 +11,7 @@
 from sklearn.preprocessing import label_binarize
 from skopt.space import Real
 
-from evalml.demos import load_breast_cancer
+from evalml.demos import load_breast_cancer, load_wine
 from evalml.exceptions import NullsInColumnWarning
 from evalml.model_family import ModelFamily
 from evalml.model_understanding.graphs import (
@@ -39,6 +39,7 @@
 from evalml.objectives import CostBenefitMatrix
 from evalml.pipelines import (
     BinaryClassificationPipeline,
+    ClassificationPipeline,
     MulticlassClassificationPipeline,
     RegressionPipeline
 )
@@ -703,6 +704,19 @@ def test_graph_binary_objective_vs_threshold(mock_cb_thresholds, data_type, X_y_
     assert np.array_equal(data['y'], mock_cb_thresholds.return_value['score'])
 
 
+def check_partial_dependence_dataframe(pipeline, part_dep, grid_size=20):
+    columns = ["feature_values", "partial_dependence"]
+    if isinstance(pipeline, ClassificationPipeline):
+        columns.append("class_label")
+    n_rows_for_class = len(pipeline.classes_) if isinstance(pipeline, MulticlassClassificationPipeline) else 1
+    assert list(part_dep.columns) == columns
+    assert len(part_dep["partial_dependence"]) == grid_size * n_rows_for_class
+    assert len(part_dep["feature_values"]) == grid_size * n_rows_for_class
+    if isinstance(pipeline, ClassificationPipeline):
+        per_class_counts = part_dep['class_label'].value_counts()
+        assert all(value == grid_size for value in per_class_counts.values)
+
+
 @pytest.mark.parametrize("data_type", ["np", "pd", "ww"])
 @pytest.mark.parametrize("problem_type", [ProblemTypes.BINARY, ProblemTypes.MULTICLASS, ProblemTypes.REGRESSION])
 def test_partial_dependence_problem_types(data_type, problem_type, X_y_binary, X_y_multi, X_y_regression,
@@ -728,9 +742,7 @@ def test_partial_dependence_problem_types(data_type, problem_type, X_y_binary, X
 
     pipeline.fit(X, y)
     part_dep = partial_dependence(pipeline, X, feature=0, grid_resolution=20)
-    assert list(part_dep.columns) == ["feature_values", "partial_dependence"]
-    assert len(part_dep["partial_dependence"]) == 20
-    assert len(part_dep["feature_values"]) == 20
+    check_partial_dependence_dataframe(pipeline, part_dep)
     assert not part_dep.isnull().any(axis=None)
     with pytest.raises(AttributeError):
         pipeline._estimator_type
@@ -757,7 +769,7 @@ def test_partial_dependence_string_feature_name(logistic_regression_binary_pipel
     pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}})
     pipeline.fit(X, y)
     part_dep = partial_dependence(pipeline, X, feature="mean radius", grid_resolution=20)
-    assert list(part_dep.columns) == ["feature_values", "partial_dependence"]
+    assert list(part_dep.columns) == ["feature_values", "partial_dependence", "class_label"]
     assert len(part_dep["partial_dependence"]) == 20
     assert len(part_dep["feature_values"]) == 20
     assert not part_dep.isnull().any(axis=None)
@@ -796,29 +808,35 @@ class BaselineTestPipeline(BinaryClassificationPipeline):
         partial_dependence(pipeline, X, feature=0, grid_resolution=20)
 
 
-def test_partial_dependence_catboost(X_y_binary, has_minimal_dependencies):
+@pytest.mark.parametrize("problem_type", [ProblemTypes.BINARY, ProblemTypes.MULTICLASS])
+def test_partial_dependence_catboost(problem_type, X_y_binary, X_y_multi, has_minimal_dependencies):
     if not has_minimal_dependencies:
-        X, y = X_y_binary
 
-        class CatBoostTestPipeline(BinaryClassificationPipeline):
-            component_graph = ["CatBoost Classifier"]
+        if problem_type == ProblemTypes.BINARY:
+            X, y = X_y_binary
+            y_small = ['a', 'b', 'a']
+
+            class CatBoostTestPipeline(BinaryClassificationPipeline):
+                component_graph = ["CatBoost Classifier"]
+        else:
+            X, y = X_y_multi
+            y_small = ['a', 'b', 'c']
+
+            class CatBoostTestPipeline(MulticlassClassificationPipeline):
+                component_graph = ["CatBoost Classifier"]
+
         pipeline = CatBoostTestPipeline({"CatBoost Classifier": {'thread_count': 1}})
         pipeline.fit(X, y)
         part_dep = partial_dependence(pipeline, X, feature=0, grid_resolution=20)
-        assert list(part_dep.columns) == ["feature_values", "partial_dependence"]
-        assert len(part_dep["partial_dependence"]) == 20
-        assert len(part_dep["feature_values"]) == 20
+        check_partial_dependence_dataframe(pipeline, part_dep)
         assert not part_dep.isnull().all().all()
 
         # test that CatBoost can natively handle non-numerical columns as feature passed to partial_dependence
         X = pd.DataFrame({'numeric': [1, 2, 3], 'also numeric': [2, 3, 4], 'string': ['a', 'b', 'c'], 'also string': ['c', 'b', 'a']})
-        y = ['a', 'b', 'a']
         pipeline = CatBoostTestPipeline({"CatBoost Classifier": {'thread_count': 1}})
-        pipeline.fit(X, y)
+        pipeline.fit(X, y_small)
         part_dep = partial_dependence(pipeline, X, feature='string')
-        assert list(part_dep.columns) == ["feature_values", "partial_dependence"]
-        assert len(part_dep["partial_dependence"]) == 3
-        assert len(part_dep["feature_values"]) == 3
+        check_partial_dependence_dataframe(pipeline, part_dep, grid_size=3)
         assert not part_dep.isnull().all().all()
 
 
@@ -848,15 +866,11 @@ class XGBoostPipeline(MulticlassClassificationPipeline):
     pipeline = XGBoostPipeline({'XGBoost Classifier': {'nthread': 1}})
     pipeline.fit(X, y)
     part_dep = partial_dependence(pipeline, X, feature="<[0]", grid_resolution=20)
-    assert list(part_dep.columns) == ["feature_values", "partial_dependence"]
-    assert len(part_dep["partial_dependence"]) == 20
-    assert len(part_dep["feature_values"]) == 20
+    check_partial_dependence_dataframe(pipeline, part_dep)
     assert not part_dep.isnull().all().all()
 
     part_dep = partial_dependence(pipeline, X, feature=1, grid_resolution=20)
-    assert list(part_dep.columns) == ["feature_values", "partial_dependence"]
-    assert len(part_dep["partial_dependence"]) == 20
-    assert len(part_dep["feature_values"]) == 20
+    check_partial_dependence_dataframe(pipeline, part_dep)
     assert not part_dep.isnull().all().all()
 
 
@@ -890,12 +904,46 @@ def test_graph_partial_dependence(test_pipeline):
     fig_dict = fig.to_dict()
     assert fig_dict['layout']['title']['text'] == "Partial Dependence of 'mean radius'"
     assert len(fig_dict['data']) == 1
+    assert fig_dict['data'][0]['name'] == "Partial Dependence"
 
     part_dep_data = partial_dependence(clf, X, feature='mean radius', grid_resolution=20)
     assert np.array_equal(fig_dict['data'][0]['x'], part_dep_data['feature_values'])
     assert np.array_equal(fig_dict['data'][0]['y'], part_dep_data['partial_dependence'].values)
 
 
+def test_graph_partial_dependence_multiclass(logistic_regression_multiclass_pipeline_class):
+    go = pytest.importorskip('plotly.graph_objects', reason='Skipping plotting test because plotly not installed')
+    X, y = load_wine()
+    pipeline = logistic_regression_multiclass_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}})
+    pipeline.fit(X, y)
+    fig = graph_partial_dependence(pipeline, X, feature='magnesium', grid_resolution=20)
+    assert isinstance(fig, go.Figure)
+    fig_dict = fig.to_dict()
+    assert len(fig_dict['data']) == len(pipeline.classes_)
+    for data, label in zip(fig_dict['data'], pipeline.classes_):
+        assert len(data['x']) == 20
+        assert len(data['y']) == 20
+        assert data['name'] == label
+
+    # Check that all the subplots axes have the same range
+    for suplot_1_axis, suplot_2_axis in [('axis2', 'axis3'), ('axis2', 'axis4'), ('axis3', 'axis4')]:
+        for axis_type in ['x', 'y']:
+            assert fig_dict['layout'][axis_type + suplot_1_axis]['range'] == fig_dict['layout'][axis_type + suplot_2_axis]['range']
+
+    fig = graph_partial_dependence(pipeline, X, feature='magnesium', class_label='class_1', grid_resolution=20)
+
+    assert isinstance(fig, go.Figure)
+    fig_dict = fig.to_dict()
+    assert len(fig_dict['data']) == 1
+    assert len(fig_dict['data'][0]['x']) == 20
+    assert len(fig_dict['data'][0]['y']) == 20
+    assert fig_dict['data'][0]['name'] == 'class_1'
+
+    msg = "Class wine is not one of the classes the pipeline was fit on: class_0, class_1, class_2"
+    with pytest.raises(ValueError, match=msg):
+        graph_partial_dependence(pipeline, X, feature='alcohol', class_label='wine')
+
+
 @patch('evalml.model_understanding.graphs.jupyter_check')
 @patch('evalml.model_understanding.graphs.import_or_raise')
 def test_jupyter_graph_check(import_check, jupyter_check, X_y_binary, X_y_regression, test_pipeline):