alteryx · dsherry · May 8, 2020 · Apr 25, 2020 · Apr 25, 2020 · Apr 25, 2020
diff --git a/docs/source/automl/search_results.ipynb b/docs/source/automl/search_results.ipynb
@@ -129,6 +129,45 @@
     "pipeline.graph_feature_importance()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## ROC Curve\n",
+    "\n",
+    "For binary classification, you can view the ROC curve of a classifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get the predicted probabilities associated with the \"true\" label\n",
+    "y_pred_proba = pipeline.predict_proba(X)[:, 1]\n",
+    "evalml.pipelines.graph_utils.graph_roc_curve(y, y_pred_proba)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Confusion Matrix\n",
+    "\n",
+    "For binary or multiclass classification, you can view a confusion matrix of the classifier's predictions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred = pipeline.predict(X)\n",
+    "evalml.pipelines.graph_utils.graph_confusion_matrix(y, y_pred)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -58,8 +58,9 @@ Changelog
         * Standardize `import_or_raise` error messages :pr:`683`
         * Updated argument order of objectives to align with sklearn's :pr:`698`
         * Renamed `pipeline.feature_importance_graph` to `pipeline.graph_feature_importances` :pr:`700`
-        * Moved ROC and confusion matrix methods to `evalml.pipelines.plot_utils` :pr:`704`
+        * Moved ROC and confusion matrix methods to `evalml.pipelines.graph_utils` :pr:`704`
         * Renamed `MultiClassificationObjective` to `MulticlassClassificationObjective`, to align with pipeline naming scheme :pr:`715`
+        * Add util methods to graph ROC and confusion matrix :pr:`720`
     * Documentation Changes
         * Fixed some sphinx warnings :pr:`593`
         * Fixed docstring for AutoClassificationSearch with correct command :pr:`599`
@@ -98,8 +99,8 @@ Changelog
     * ``fit()`` and ``predict()`` now use an optional ``objective`` parameter, which is only used in binary classification pipelines to fit for a specific objective.
     * ``score()`` will now use a required ``objectives`` parameter that is used to determine all the objectives to score on. This differs from the previous behavior, where the pipeline's objective was scored on regardless.
     * ``score()`` will now return one dictionary of all objective scores.
-    * ``ROC`` and ``ConfusionMatrix`` plot methods via ``Auto(*).plot`` have been removed by :pr:`615` and are replaced by ``roc_curve`` and ``confusion_matrix`` in `evamlm.pipelines.plot_utils`` in :pr:`704`
-    * ``normalize_confusion_matrix`` has been moved to ``evalml.pipelines.plot_utils`` :pr:`704`
+    * ``ROC`` and ``ConfusionMatrix`` plot methods via ``Auto(*).plot`` have been removed by :pr:`615` and are replaced by util methods in `evamlm.pipelines.graph_utils`` :pr:`704` :pr:`720`
+    * ``normalize_confusion_matrix`` has been moved to ``evalml.pipelines.graph_utils`` :pr:`704`
     * Pipelines ``_name`` field changed to ``custom_name``
     * Pipelines ``supported_problem_types`` field is removed because it is no longer necessary :pr:`678`
     * Updated argument order of objectives' `objective_function` to align with sklearn :pr:`698`

diff --git a/evalml/pipelines/__init__.py b/evalml/pipelines/__init__.py
@@ -47,7 +47,7 @@
     get_pipelines,
     list_model_families
 )
-from .plot_utils import (
+from .graph_utils import (
     roc_curve,
     confusion_matrix,
     normalize_confusion_matrix

diff --git a/evalml/pipelines/graph_utils.py b/evalml/pipelines/graph_utils.py
@@ -0,0 +1,143 @@
+import warnings
+
+import numpy as np
+import pandas as pd
+from sklearn.metrics import auc as sklearn_auc
+from sklearn.metrics import confusion_matrix as sklearn_confusion_matrix
+from sklearn.metrics import roc_curve as sklearn_roc_curve
+from sklearn.utils.multiclass import unique_labels
+
+from evalml.utils import import_or_raise
+
+
+def roc_curve(y_true, y_pred_proba):
+    """
+    Given labels and binary classifier predicted probabilities, compute and return the data representing a Receiver Operating Characteristic (ROC) curve.
+
+    Arguments:
+        y_true (pd.Series or np.array): true binary labels.
+        y_pred_proba (pd.Series or np.array): predictions from a binary classifier, before thresholding has been applied. Note this should be the predicted probability for the "true" label.
+
+    Returns:
+        dict: Dictionary containing metrics used to generate an ROC plot, with the following keys:
+                  * `fpr_rates`: False positive rates.
+                  * `tpr_rates`: True positive rates.
+                  * `thresholds`: Threshold values used to produce each pair of true/false positive rates.
+                  * `auc_score`: The area under the ROC curve.
+    """
+    fpr_rates, tpr_rates, thresholds = sklearn_roc_curve(y_true, y_pred_proba)
+    auc_score = sklearn_auc(fpr_rates, tpr_rates)
+    return {'fpr_rates': fpr_rates,
+            'tpr_rates': tpr_rates,
+            'thresholds': thresholds,
+            'auc_score': auc_score}
+
+
+def graph_roc_curve(y_true, y_pred_proba, title_addition=None):
+    """Generate and display a Receiver Operating Characteristic (ROC) plot.
+
+    Arguments:
+        y_true (pd.Series or np.array): true binary labels.
+        y_pred_proba (pd.Series or np.array): predictions from a binary classifier, before thresholding has been applied. Note this should be the predicted probability for the "true" label.
+        title_addition (str or None): if not None, append to plot title. Default None.
+
+    Returns:
+        plotly.Figure representing the ROC plot generated
+    """
+    _go = import_or_raise("plotly.graph_objects", error_msg="Cannot find dependency plotly.graph_objects")
+    roc_curve_data = roc_curve(y_true, y_pred_proba)
+    title = 'Receiver Operating Characteristic{}'.format('' if title_addition is None else (' ' + title_addition))
+    layout = _go.Layout(title={'text': title},
+                        xaxis={'title': 'False Positive Rate', 'range': [-0.05, 1.05]},
+                        yaxis={'title': 'True Positive Rate', 'range': [-0.05, 1.05]})
+    data = []
+    data.append(_go.Scatter(x=roc_curve_data['fpr_rates'], y=roc_curve_data['tpr_rates'],
+                            name='ROC (AUC {:06f})'.format(roc_curve_data['auc_score']),
+                            line=dict(width=3)))
+    data.append(_go.Scatter(x=[0, 1], y=[0, 1],
+                            name='Trivial Model (AUC 0.5)',
+                            line=dict(dash='dash')))
+    return _go.Figure(layout=layout, data=data)
+
+
+def confusion_matrix(y_true, y_predicted, normalize_method='true'):
+    """Confusion matrix for binary and multiclass classification.
+
+    Arguments:
+        y_true (pd.Series or np.array): true binary labels.
+        y_pred (pd.Series or np.array): predictions from a binary classifier.
+        normalize_method ({'true', 'pred', 'all'}): Normalization method. Supported options are: 'true' to normalize by row, 'pred' to normalize by column, or 'all' to normalize by all values. Defaults to 'true'.
+
+    Returns:
+        np.array: confusion matrix
+    """
+    labels = unique_labels(y_true, y_predicted)
+    conf_mat = sklearn_confusion_matrix(y_true, y_predicted)
+    conf_mat = pd.DataFrame(conf_mat, columns=labels)
+    if normalize_method is not None:
+        return normalize_confusion_matrix(conf_mat, normalize_method=normalize_method)
+    return conf_mat
+
+
+def normalize_confusion_matrix(conf_mat, normalize_method='true'):
+    """Normalizes a confusion matrix.
+
+    Arguments:
+        conf_mat (pd.DataFrame or np.array): confusion matrix to normalize.
+        normalize_method ({'true', 'pred', 'all'}): Normalization method. Supported options are: 'true' to normalize by row, 'pred' to normalize by column, or 'all' to normalize by all values. Defaults to 'true'.
+
+    Returns:
+        A normalized version of the input confusion matrix.
+    """
+    with warnings.catch_warnings(record=True) as w:
+        if normalize_method == 'true':
+            conf_mat = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
+        elif normalize_method == 'pred':
+            conf_mat = conf_mat.astype('float') / conf_mat.sum(axis=0)
+        elif normalize_method == 'all':
+            conf_mat = conf_mat.astype('float') / conf_mat.sum().sum()
+        else:
+            raise ValueError('Invalid value provided for "normalize_method": %s'.format(normalize_method))
+        if w and "invalid value encountered in" in str(w[0].message):
+            raise ValueError("Sum of given axis is 0 and normalization is not possible. Please select another option.")
+    return conf_mat
+
+
+def graph_confusion_matrix(y_true, y_pred, normalize_method='true', title_addition=None):
+    """Generate and display a confusion matrix plot.
+
+    If `normalize_method` is set, hover text will show raw count, otherwise hover text will show count normalized with method 'true'.
+
+    Arguments:
+        y_true (pd.Series or np.array): true binary labels.
+        y_pred (pd.Series or np.array): predictions from a binary classifier.
+        normalize_method ({'true', 'pred', 'all'}): Normalization method. Supported options are: 'true' to normalize by row, 'pred' to normalize by column, or 'all' to normalize by all values. Defaults to 'true'.
+        title_addition (str or None): if not None, append to plot title. Default None.
+
+    Returns:
+        plotly.Figure representing the confusion matrix plot generated
+    """
+    _go = import_or_raise("plotly.graph_objects", error_msg="Cannot find dependency plotly.graph_objects")
+    conf_mat = confusion_matrix(y_true, y_pred, normalize_method=None)
+    conf_mat_normalized = confusion_matrix(y_true, y_pred, normalize_method=normalize_method or 'true')
+    labels = conf_mat.columns
+
+    title = 'Confusion matrix{}{}'.format(
+        '' if title_addition is None else (' ' + title_addition),
+        '' if normalize_method is None else (', normalized using method "' + normalize_method + '"'))
+    z_data, custom_data = (conf_mat, conf_mat_normalized) if normalize_method is None else (conf_mat_normalized, conf_mat)
+    primary_heading, secondary_heading = ('Raw', 'Normalized') if normalize_method is None else ('Normalized', 'Raw')
+    hover_text = '<br><b>' + primary_heading + ' Count</b>: %{z}<br><b>' + secondary_heading + ' Count</b>: %{customdata} <br>'
+    # the "<extra> tags at the end are necessary to remove unwanted trace info
+    hover_template = '<b>True</b>: %{y}<br><b>Predicted</b>: %{x}' + hover_text + '<extra></extra>'
+    layout = _go.Layout(title={'text': title},
+                        xaxis={'title': 'Predicted Label', 'type': 'category', 'tickvals': labels},
+                        yaxis={'title': 'True Label', 'type': 'category', 'tickvals': labels})
+    fig = _go.Figure(data=_go.Heatmap(x=labels, y=labels, z=z_data,
+                                      customdata=custom_data,
+                                      hovertemplate=hover_template,
+                                      colorscale='Blues'),
+                     layout=layout)
+    # plotly Heatmap y axis defaults to the reverse of what we want: https://community.plotly.com/t/heatmap-y-axis-is-reversed-by-default-going-against-standard-convention-for-matrices/32180
+    fig.update_yaxes(autorange="reversed")
+    return fig
diff --git a/evalml/pipelines/plot_utils.py b/evalml/pipelines/plot_utils.py