Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Model_Understanding documentation for plotly use on Jupyter Lab #1108

Merged
merged 27 commits into from Sep 3, 2020
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Expand Up @@ -45,6 +45,7 @@ Release Notes
* Documentation Changes
* Update setup.py URL to point to the github repo :pr:`1037`
* Added tutorial for using the cost-benefit matrix objective :pr:`1088`
* Updated `model_understanding.ipynb` to include documentation for using plotly on Jupyter Lab :pr:`1108`
* Testing Changes
* Refactor CircleCI tests to use matrix jobs (:pr:`1043`)
* Added a test to check that all test directories are included in evalml package :pr:`1054`
Expand Down
9 changes: 7 additions & 2 deletions docs/source/user_guide/model_understanding.ipynb
Expand Up @@ -8,7 +8,12 @@
"\n",
"Simply examining a model's performance metrics is not enough to select a model and promote it for use in a production setting. While developing an ML algorithm, it is important to understand how the model behaves on the data, to examine the key factors influencing its predictions and to consider where it may be deficient. Determination of what \"success\" may mean for an ML project depends first and foremost on the user's domain expertise.\n",
"\n",
"EvalML includes a variety of tools for understanding models, from graphing utilities to methods for explaining predictions."
"EvalML includes a variety of tools for understanding models, from graphing utilities to methods for explaining predictions.\n",
"\n",
"\n",
"** Graphing methods on Jupyter Notebook and Jupyter Lab require [ipywidgets](https://ipywidgets.readthedocs.io/en/latest/user_install.html) to be installed.\n",
bchen1116 marked this conversation as resolved.
Show resolved Hide resolved
"\n",
"** If graphing on Jupyter Lab, [jupyterlab-plotly](https://plotly.com/python/getting-started/#jupyterlab-support-python-35) required. To download this, make sure you have [npm](https://nodejs.org/en/download/) installed."
]
},
{
Expand Down Expand Up @@ -384,4 +389,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
17 changes: 16 additions & 1 deletion evalml/model_understanding/graphs.py
Expand Up @@ -18,7 +18,7 @@
from evalml.model_family import ModelFamily
from evalml.objectives.utils import get_objective
from evalml.problem_types import ProblemTypes
from evalml.utils import import_or_raise
from evalml.utils import import_or_raise, import_or_warn, jupyter_check


def confusion_matrix(y_true, y_predicted, normalize_method='true'):
Expand Down Expand Up @@ -105,6 +105,8 @@ def graph_precision_recall_curve(y_true, y_pred_proba, title_addition=None):
plotly.Figure representing the precision-recall plot generated
"""
_go = import_or_raise("plotly.graph_objects", error_msg="Cannot find dependency plotly.graph_objects")
if jupyter_check():
import_or_warn("ipywidgets")

if isinstance(y_true, pd.Series):
y_true = y_true.to_numpy()
Expand Down Expand Up @@ -160,6 +162,8 @@ def graph_roc_curve(y_true, y_pred_proba, custom_class_names=None, title_additio
plotly.Figure representing the ROC plot generated
"""
_go = import_or_raise("plotly.graph_objects", error_msg="Cannot find dependency plotly.graph_objects")
if jupyter_check():
import_or_warn("ipywidgets")

if isinstance(y_true, pd.Series):
y_true = y_true.to_numpy()
Expand Down Expand Up @@ -215,6 +219,8 @@ def graph_confusion_matrix(y_true, y_pred, normalize_method='true', title_additi
plotly.Figure representing the confusion matrix plot generated
"""
_go = import_or_raise("plotly.graph_objects", error_msg="Cannot find dependency plotly.graph_objects")
if jupyter_check():
import_or_warn("ipywidgets")

if isinstance(y_true, pd.Series):
y_true = y_true.to_numpy()
Expand Down Expand Up @@ -293,6 +299,9 @@ def graph_permutation_importance(pipeline, X, y, objective, importance_threshold
plotly.Figure, a bar graph showing features and their respective permutation importance.
"""
go = import_or_raise("plotly.graph_objects", error_msg="Cannot find dependency plotly.graph_objects")
if jupyter_check():
import_or_warn("ipywidgets")

perm_importance = calculate_permutation_importance(pipeline, X, y, objective)
perm_importance['importance'] = perm_importance['importance']

Expand Down Expand Up @@ -373,6 +382,9 @@ def graph_binary_objective_vs_threshold(pipeline, X, y, objective, steps=100):

"""
_go = import_or_raise("plotly.graph_objects", error_msg="Cannot find dependency plotly.graph_objects")
if jupyter_check():
import_or_warn("ipywidgets")

objective = get_objective(objective)
df = binary_objective_vs_threshold(pipeline, X, y, objective, steps)
title = f'{objective.name} Scores vs. Thresholds'
Expand Down Expand Up @@ -430,6 +442,9 @@ def graph_partial_dependence(pipeline, X, feature, grid_resolution=100):

"""
_go = import_or_raise("plotly.graph_objects", error_msg="Cannot find dependency plotly.graph_objects")
if jupyter_check():
import_or_warn("ipywidgets")

part_dep = partial_dependence(pipeline, X, feature=feature, grid_resolution=grid_resolution)
feature_name = str(feature)
title = f"Partial Dependence of '{feature_name}'"
Expand Down
4 changes: 4 additions & 0 deletions evalml/pipelines/pipeline_base.py
Expand Up @@ -24,6 +24,8 @@
get_logger,
get_random_state,
import_or_raise,
import_or_warn,
jupyter_check,
log_subtitle,
log_title
)
Expand Down Expand Up @@ -398,6 +400,8 @@ def graph_feature_importance(self, importance_threshold=0):
plotly.Figure, a bar graph showing features and their corresponding importance
"""
go = import_or_raise("plotly.graph_objects", error_msg="Cannot find dependency plotly.graph_objects")
if jupyter_check():
import_or_warn("ipywidgets")

feat_imp = self.feature_importance
feat_imp['importance'] = abs(feat_imp['importance'])
Expand Down
49 changes: 49 additions & 0 deletions evalml/tests/model_understanding_tests/test_graphs.py
Expand Up @@ -619,3 +619,52 @@ def test_graph_partial_dependence(test_pipeline):
part_dep_data = partial_dependence(clf, X, feature='mean radius', grid_resolution=20)
assert np.array_equal(fig_dict['data'][0]['x'], part_dep_data['feature_values'])
assert np.array_equal(fig_dict['data'][0]['y'], part_dep_data['partial_dependence'].values)


@patch('evalml.model_understanding.graphs.jupyter_check')
@patch('evalml.model_understanding.graphs.import_or_warn')
def test_jupyter_graph_check(import_check, jupyter_check, X_y_binary, test_pipeline):
bchen1116 marked this conversation as resolved.
Show resolved Hide resolved
bchen1116 marked this conversation as resolved.
Show resolved Hide resolved
pytest.importorskip('plotly.graph_objects', reason='Skipping plotting test because plotly not installed')
X, y = X_y_binary
clf = test_pipeline
clf.fit(X, y)
cbm = CostBenefitMatrix(true_positive=1, true_negative=-1, false_positive=-7, false_negative=-2)
jupyter_check.return_value = False
with pytest.warns(None) as graph_valid:
graph_permutation_importance(test_pipeline, X, y, "log_loss_binary")
assert len(graph_valid) == 0
assert not import_check.called
with pytest.warns(None) as graph_valid:
graph_confusion_matrix(y, y)
assert len(graph_valid) == 0
bchen1116 marked this conversation as resolved.
Show resolved Hide resolved
assert not import_check.called

jupyter_check.return_value = True
with pytest.warns(None) as graph_valid:
graph_partial_dependence(clf, X, feature=0, grid_resolution=20)
assert len(graph_valid) == 0
import_check.assert_called_with('ipywidgets')
with pytest.warns(None) as graph_valid:
graph_binary_objective_vs_threshold(test_pipeline, X, y, cbm)
assert len(graph_valid) == 0
import_check.assert_called_with('ipywidgets')
with pytest.warns(None) as graph_valid:
rs = np.random.RandomState(42)
y_pred_proba = y * rs.random(y.shape)
graph_precision_recall_curve(y, y_pred_proba)
assert len(graph_valid) == 0
import_check.assert_called_with('ipywidgets')
with pytest.warns(None) as graph_valid:
graph_permutation_importance(test_pipeline, X, y, "log_loss_binary")
assert len(graph_valid) == 0
import_check.assert_called_with('ipywidgets')
with pytest.warns(None) as graph_valid:
graph_confusion_matrix(y, y)
assert len(graph_valid) == 0
import_check.assert_called_with('ipywidgets')
with pytest.warns(None) as graph_valid:
rs = np.random.RandomState(42)
y_pred_proba = y * rs.random(y.shape)
graph_roc_curve(y, y_pred_proba)
assert len(graph_valid) == 0
import_check.assert_called_with('ipywidgets')
19 changes: 19 additions & 0 deletions evalml/tests/pipeline_tests/test_graphs.py
Expand Up @@ -113,3 +113,22 @@ def test_graph_feature_importance_threshold(X_y_binary, test_pipeline):

data = figure.data[0]
assert (np.all(data['x'] >= 0.5))


@patch('evalml.pipelines.pipeline_base.jupyter_check')
@patch('evalml.pipelines.pipeline_base.import_or_warn')
def test_jupyter_graph_check(import_check, jupyter_check, X_y_binary, test_pipeline):
bchen1116 marked this conversation as resolved.
Show resolved Hide resolved
pytest.importorskip('plotly.graph_objects', reason='Skipping plotting test because plotly not installed')
X, y = X_y_binary
clf = test_pipeline
clf.fit(X, y)
jupyter_check.return_value = False
bchen1116 marked this conversation as resolved.
Show resolved Hide resolved
with pytest.warns(None) as graph_valid:
clf.graph_feature_importance()
assert len(graph_valid) == 0

jupyter_check.return_value = True
import_check.return_value = True
with pytest.warns(None) as graph_valid:
clf.graph_feature_importance()
import_check.assert_called_with('ipywidgets')
21 changes: 20 additions & 1 deletion evalml/tests/utils_tests/test_gen_utils.py
Expand Up @@ -12,7 +12,8 @@
get_importable_subclasses,
get_random_seed,
get_random_state,
import_or_raise
import_or_raise,
import_or_warn
)


Expand Down Expand Up @@ -172,3 +173,21 @@ class ChildClass(ComponentBase):
pass

assert ChildClass not in get_importable_subclasses(ComponentBase)


@patch('importlib.import_module')
bchen1116 marked this conversation as resolved.
Show resolved Hide resolved
def test_import_or_warn_errors(dummy_importlib):
def _mock_import_function(library_str):
if library_str == "_evalml":
raise ImportError("Mock ImportError executed!")
if library_str == "attr_error_lib":
raise Exception("Mock Exception executed!")

dummy_importlib.side_effect = _mock_import_function

with pytest.warns(UserWarning, match="Missing optional dependency '_evalml'"):
import_or_warn("_evalml")
with pytest.warns(UserWarning, match="Missing optional dependency '_evalml'. Please use pip to install _evalml. Additional error message"):
import_or_warn("_evalml", "Additional error message")
with pytest.warns(UserWarning, match="An exception occurred while trying to import `attr_error_lib`: Mock Exception executed!"):
import_or_warn("attr_error_lib")
2 changes: 1 addition & 1 deletion evalml/utils/__init__.py
@@ -1,4 +1,4 @@
# flake8:noqa
from .logger import get_logger, log_subtitle, log_title
from .gen_utils import classproperty, import_or_raise, convert_to_seconds, get_random_state, get_random_seed, SEED_BOUNDS
from .gen_utils import classproperty, import_or_raise, convert_to_seconds, get_random_state, get_random_seed, SEED_BOUNDS, jupyter_check, import_or_warn
from .cli_utils import print_info, get_evalml_root, get_installed_packages, get_sys_info, print_sys_info, print_deps
37 changes: 37 additions & 0 deletions evalml/utils/gen_utils.py
@@ -1,4 +1,5 @@
import importlib
import warnings
from collections import namedtuple

import numpy as np
Expand Down Expand Up @@ -181,3 +182,39 @@ def get_importable_subclasses(base_class, used_in_automl=True):
classes = [cls for cls in classes if cls.__name__ not in _not_used_in_automl]

return classes


def jupyter_check():
"""Get whether or not the code is being run in a Ipython environment (such as Jupyter Notebook or Jupyter Lab)

Arguments:
None

Returns:
Boolean: True if Ipython, False otherwise
"""
try:
get_ipython()
return True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can get to a point where this is the only line that's not covered by unit tests.

except NameError:
return False


def import_or_warn(library, error_msg=None):
"""Attempts to import the requested library by name.
If the import fails, shows an ImportWarning.

Arguments:
library (str): the name of the library
error_msg (str): error message to return if the import fails
"""
try:
return importlib.import_module(library)
except ImportError:
if error_msg is None:
error_msg = ""
msg = (f"Missing optional dependency '{library}'. Please use pip to install {library}. {error_msg}")
warnings.warn(msg)
except Exception as ex:
msg = (f"An exception occurred while trying to import `{library}`: {str(ex)}")
warnings.warn(msg)
bchen1116 marked this conversation as resolved.
Show resolved Hide resolved