alteryx · ParthivNaresh · Dec 24, 2020 · Dec 11, 2020 · Dec 11, 2020 · Dec 11, 2020
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -66,7 +66,7 @@ commands:
             python .circleci/conda_config.py "$(python setup.py --version)"
             cd evalml-core-feedstock
             echo "$DOCKERHUB_PASSWORD" | docker login -u $DOCKERHUB_USERNAME --password-stdin
-            export DOCKER_CONTAINERID="$(docker run -td condaforge/linux-anvil-comp7)"
+            export DOCKER_CONTAINERID="$(docker run -td condaforge/linux-anvil-cos7-x86_64)"
             echo "Created container ${DOCKER_CONTAINERID}"
             chmod -R 777 ./
             docker cp . ${DOCKER_CONTAINERID}:/home/conda/feedstock_root/

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -3,6 +3,7 @@ Release Notes
 
 **Future Releases**
     * Enhancements
+        * Added ``save_plot`` that allows for saving figures from different backends :pr:`1588`
         * Added ``LightGBM Regressor`` to regression components :pr:`1459`
         * Added ``visualize_decision_tree`` for tree visualization with ``decision_tree_data_from_estimator`` and ``decision_tree_data_from_pipeline`` to reformat tree structure output :pr:`1511`
         * Added `DFS Transformer` component into transformer components :pr:`1454`

diff --git a/evalml/tests/dependency_update_check/latest_dependency_versions.txt b/evalml/tests/dependency_update_check/latest_dependency_versions.txt
@@ -5,7 +5,9 @@ colorama==0.4.4
 featuretools==0.22.0
 graphviz==0.15
 ipywidgets==7.5.1
+kaleido==0.1.0
 lightgbm==3.1.1
+matplotlib==3.3.3
 networkx==2.5
 nlp-primitives==1.1.0
 numpy==1.19.4
@@ -16,6 +18,7 @@ requirements-parser==0.2.0
 scikit-learn==0.23.2
 scikit-optimize==0.8.1
 scipy==1.5.4
+seaborn==0.11.1
 shap==0.37.0
 texttable==1.6.3
 woodwork==0.0.7

diff --git a/evalml/tests/utils_tests/test_gen_utils.py b/evalml/tests/utils_tests/test_gen_utils.py
@@ -1,11 +1,13 @@
 import inspect
+import os
 from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pandas as pd
 import pytest
 import woodwork as ww
 
+from evalml.model_understanding.graphs import visualize_decision_tree
 from evalml.pipelines.components import ComponentBase
 from evalml.utils.gen_utils import (
     SEED_BOUNDS,
@@ -21,7 +23,8 @@
     get_random_state,
     import_or_raise,
     jupyter_check,
-    pad_with_nans
+    pad_with_nans,
+    save_plot
 )
 
 
@@ -393,3 +396,213 @@ def test_convert_to_woodwork_structure():
     X_expected = ww.DataTable(pd.DataFrame(X_np))
     pd.testing.assert_frame_equal(X_expected.to_dataframe(), _convert_to_woodwork_structure(X_np).to_dataframe())
     assert np.array_equal(X_np, np.array([[1, 2], [3, 4]]))
+
+
+@pytest.mark.parametrize("file_name,format,interactive",
+                         [
+                             ('test_plot', 'png', False),
+                             ('test_plot.png', 'png', False),
+                             ('test_plot.', 'png', False),
+                             ('test_plot.png', 'jpeg', False)
+                         ])
+def test_save_plotly_static_default_format(file_name, format, interactive, decision_tree_classification_pipeline_class, tmpdir, has_minimal_dependencies):
+    if not has_minimal_dependencies:
+        pipeline = decision_tree_classification_pipeline_class
+        feat_fig_ = pipeline.graph_feature_importance()
+
+        filepath = os.path.join(str(tmpdir), f'{file_name}')
+        no_output_ = save_plot(fig=feat_fig_, filepath=filepath, format=format, interactive=interactive, return_filepath=False)
+        output_ = save_plot(fig=feat_fig_, filepath=filepath, format=format, interactive=interactive, return_filepath=True)
+
+        assert not no_output_
+        assert os.path.exists(output_)
+        assert isinstance(output_, str)
+        assert os.path.basename(output_) == 'test_plot.png'
+
+
+@pytest.mark.parametrize("file_name,format,interactive",
+                         [
+                             ('test_plot', 'jpeg', False)
+                         ])
+def test_save_plotly_static_different_format(file_name, format, interactive, decision_tree_classification_pipeline_class, tmpdir, has_minimal_dependencies):
+    if not has_minimal_dependencies:
+        pipeline = decision_tree_classification_pipeline_class
+        feat_fig_ = pipeline.graph_feature_importance()
+
+        filepath = os.path.join(str(tmpdir), f'{file_name}')
+        no_output_ = save_plot(fig=feat_fig_, filepath=filepath, format=format, interactive=interactive, return_filepath=False)
+        output_ = save_plot(fig=feat_fig_, filepath=filepath, format=format, interactive=interactive, return_filepath=True)
+
+        assert not no_output_
+        assert os.path.exists(output_)
+        assert isinstance(output_, str)
+        assert os.path.basename(output_) == 'test_plot.jpeg'
+
+
+@pytest.mark.parametrize("file_name,format,interactive",
+                         [
+                             (None, 'jpeg', False)
+                         ])
+def test_save_plotly_static_no_filepath(file_name, format, interactive, decision_tree_classification_pipeline_class, tmpdir, has_minimal_dependencies):
+    if not has_minimal_dependencies:
+        pipeline = decision_tree_classification_pipeline_class
+        feat_fig_ = pipeline.graph_feature_importance()
+
+        filepath = os.path.join(str(tmpdir), f'{file_name}') if file_name else None
+        output_ = save_plot(fig=feat_fig_, filepath=filepath, format=format, interactive=interactive, return_filepath=True)
+
+        assert os.path.exists(output_)
+        assert isinstance(output_, str)
+        assert os.path.basename(output_) == 'test_plot.jpeg'
+        os.remove('test_plot.jpeg')
+
+
+@pytest.mark.parametrize("file_name,format,interactive",
+                         [
+                             ('test_plot', 'html', True),
+                             ('test_plot.png', 'html', True),
+                             ('test_plot.', 'html', True),
+                             ('test_plot.png', 'jpeg', True),
+                             ('test_plot', None, True),
+                             ('test_plot.html', None, True)
+                         ])
+def test_save_plotly_interactive(file_name, format, interactive, decision_tree_classification_pipeline_class, tmpdir, has_minimal_dependencies):
+    if not has_minimal_dependencies:
+        pipeline = decision_tree_classification_pipeline_class
+        feat_fig_ = pipeline.graph_feature_importance()
+
+        filepath = os.path.join(str(tmpdir), f'{file_name}') if file_name else None
+        no_output_ = save_plot(fig=feat_fig_, filepath=filepath, format=format, interactive=interactive, return_filepath=False)
+        output_ = save_plot(fig=feat_fig_, filepath=filepath, format=format, interactive=interactive, return_filepath=True)
+
+        assert not no_output_
+        assert os.path.exists(output_)
+        assert isinstance(output_, str)
+        assert os.path.basename(output_) == 'test_plot.html'
+
+
+@pytest.mark.parametrize("file_name,format,interactive",
+                         [
+                             ('test_plot', 'png', False),
+                             ('test_plot.png', 'png', False),
+                             ('test_plot.', 'png', False)
+                         ])
+def test_save_graphviz_default_format(file_name, format, interactive, fitted_tree_estimators, tmpdir, has_minimal_dependencies):
+    if not has_minimal_dependencies:
+        est_class, _ = fitted_tree_estimators
+        src = visualize_decision_tree(estimator=est_class, filled=True, max_depth=3)
+
+        filepath = os.path.join(str(tmpdir), f'{file_name}') if file_name else None
+        no_output_ = save_plot(fig=src, filepath=filepath, format=format, interactive=interactive, return_filepath=False)
+        output_ = save_plot(fig=src, filepath=filepath, format=format, interactive=interactive, return_filepath=True)
+
+        assert not no_output_
+        assert os.path.exists(output_)
+        assert isinstance(output_, str)
+        assert os.path.basename(output_) == 'test_plot.png'
+
+
+@pytest.mark.parametrize("file_name,format,interactive",
+                         [
+                             ('test_plot', 'jpeg', False)
+                         ])
+def test_save_graphviz_different_format(file_name, format, interactive, fitted_tree_estimators, tmpdir, has_minimal_dependencies):
+    if not has_minimal_dependencies:
+        est_class, _ = fitted_tree_estimators
+        src = visualize_decision_tree(estimator=est_class, filled=True, max_depth=3)
+
+        filepath = os.path.join(str(tmpdir), f'{file_name}')
+        no_output_ = save_plot(fig=src, filepath=filepath, format=format, interactive=interactive, return_filepath=False)
+        output_ = save_plot(fig=src, filepath=filepath, format=format, interactive=interactive, return_filepath=True)
+
+        assert not no_output_
+        assert os.path.exists(output_)
+        assert isinstance(output_, str)
+        assert os.path.basename(output_) == 'test_plot.png'
+
+
+@pytest.mark.parametrize("file_name,format,interactive",
+                         [
+                             ('Output/in_folder_plot', 'jpeg', True)
+                         ])
+def test_save_graphviz_invalid_filepath(file_name, format, interactive, fitted_tree_estimators, tmpdir, has_minimal_dependencies):
+    if not has_minimal_dependencies:
+        est_class, _ = fitted_tree_estimators
+        src = visualize_decision_tree(estimator=est_class, filled=True, max_depth=3)
+
+        filepath = f'{file_name}.{format}'
+
+        with pytest.raises(ValueError, match="Specified filepath is not writeable"):
+            save_plot(fig=src, filepath=filepath, format=format, interactive=interactive, return_filepath=False)
+
+
+@pytest.mark.parametrize("file_name,format,interactive",
+                         [
+                             ('example_plot', None, False),
+                             ('example_plot', 'png', False)
+                         ])
+def test_save_graphviz_different_filename_output(file_name, format, interactive, fitted_tree_estimators, tmpdir, has_minimal_dependencies):
+    if not has_minimal_dependencies:
+        est_class, _ = fitted_tree_estimators
+        src = visualize_decision_tree(estimator=est_class, filled=True, max_depth=3)
+
+        filepath = os.path.join(str(tmpdir), f'{file_name}')
+        no_output_ = save_plot(fig=src, filepath=filepath, format=format, interactive=interactive, return_filepath=False)
+        output_ = save_plot(fig=src, filepath=filepath, format=format, interactive=interactive, return_filepath=True)
+
+        assert not no_output_
+        assert os.path.exists(output_)
+        assert isinstance(output_, str)
+        assert os.path.basename(output_) == 'example_plot.png'
+
+
+@pytest.mark.parametrize("file_name,format,interactive",
+                         [
+                             ('test_plot', 'png', False),
+                             ('test_plot.png', 'png', False),
+                             ('test_plot.', 'png', False),
+                             ('test_plot.png', 'jpeg', False)
+                         ])
+def test_save_matplotlib_default_format(file_name, format, interactive, fitted_tree_estimators, tmpdir):
+    plt = pytest.importorskip("matplotlib.pyplot")
+
+    def setup_plt():
+        fig_ = plt.figure(figsize=(4.5, 4.5))
+        plt.plot(range(5))
+        return fig_
+
+    fig = setup_plt()
+    filepath = os.path.join(str(tmpdir), f'{file_name}')
+    no_output_ = save_plot(fig=fig, filepath=filepath, format=format, interactive=interactive, return_filepath=False)
+    output_ = save_plot(fig=fig, filepath=filepath, format=format, interactive=interactive, return_filepath=True)
+
+    assert not no_output_
+    assert os.path.exists(output_)
+    assert isinstance(output_, str)
+    assert os.path.basename(output_) == 'test_plot.png'
+
+
+@pytest.mark.parametrize("file_name,format,interactive",
+                         [
+                             ('test_plot', 'png', False),
+                             ('test_plot.png', 'png', False),
+                             ('test_plot.', 'png', False),
+                             ('test_plot.png', 'jpeg', False)
+                         ])
+def test_save_seaborn_default_format(file_name, format, interactive, fitted_tree_estimators, tmpdir, has_minimal_dependencies):
+    sns = pytest.importorskip("seaborn")
+
+    def setup_plt():
+        data_ = [0, 1, 2, 3, 4]
+        fig = sns.scatterplot(data=data_)
+        return fig
+
+    fig = setup_plt()
+    filepath = os.path.join(str(tmpdir), f'{file_name}')
+    no_output_ = save_plot(fig=fig, filepath=filepath, format=format, interactive=interactive, return_filepath=False)
+    output_ = save_plot(fig=fig, filepath=filepath, format=format, interactive=interactive, return_filepath=True)
+
+    assert not no_output_
+    assert os.path.exists(output_)
+    assert isinstance(output_, str)
+    assert os.path.basename(output_) == 'test_plot.png'
diff --git a/evalml/utils/gen_utils.py b/evalml/utils/gen_utils.py
@@ -1,4 +1,5 @@
 import importlib
+import os
 import warnings
 from collections import namedtuple
 from functools import reduce
@@ -394,3 +395,96 @@ def drop_rows_with_nans(pd_data_1, pd_data_2):
 
     mask = _get_rows_without_nans(pd_data_1, pd_data_2)
     return pd_data_1.iloc[mask], pd_data_2.iloc[mask]
+
+
+def _file_path_check(filepath=None, format='png', interactive=False, is_plotly=False):
+    """ Helper function to check the filepath being passed.
+
+    Arguments:
+        filepath (str or Path, optional): Location to save file.
+        format (str): Extension for figure to be saved as. Defaults to 'png'.
+        interactive (bool, optional): If True and fig is of type plotly.Figure, sets the format to 'html'.
+        is_plotly (bool, optional): Check to see if the fig being passed is of type plotly.Figure.
+
+    Returns:
+        String representing the final filepath the image will be saved to.
+    """
+    if filepath:
+        filepath = str(filepath)
+        path_and_name, extension = os.path.splitext(filepath)
+        extension = extension[1:].lower() if extension else None
+        if is_plotly and interactive:
+            format_ = 'html'
+        elif not extension and not interactive:
+            format_ = format
+        else:
+            format_ = extension
+        filepath = f'{path_and_name}.{format_}'
+        try:
+            f = open(filepath, 'w')
+            f.close()
+        except (IOError, FileNotFoundError):
+            raise ValueError(('Specified filepath is not writeable: {}'.format(filepath)))
+    return filepath
+
+
+def save_plot(fig, filepath=None, format='png', interactive=False, return_filepath=False):
+    """Saves fig to filepath if specified, or to a default location if not.
+
+    Arguments:
+        fig (Figure): Figure to be saved.
+        filepath (str or Path, optional): Location to save file. Default is with filename "test_plot".
+        format (str): Extension for figure to be saved as. Ignored if interactive is True and fig
+        is of type plotly.Figure. Defaults to 'png'.
+        interactive (bool, optional): If True and fig is of type plotly.Figure, saves the fig as interactive
+        instead of static, and format will be set to 'html'. Defaults to False.
+        return_filepath (bool, optional): Whether to return the final filepath the image is saved to. Defaults to False.
+
+    Returns:
+        String representing the final filepath the image was saved to if return_filepath is set to True.
+        Defaults to None.
+    """
+    plotly_ = import_or_raise("plotly", error_msg="Cannot find dependency plotly")
+    graphviz_ = import_or_raise('graphviz', error_msg='Please install graphviz to visualize trees.')
+    matplotlib = import_or_raise("matplotlib", error_msg="Cannot find dependency matplotlib")
+    plt_ = matplotlib.pyplot
+    axes_ = matplotlib.axes
+
+    is_plotly = False
+    is_graphviz = False
+    is_plt = False
+    is_seaborn = False
+
+    format = format if format else 'png'
+    if isinstance(fig, plotly_.graph_objects.Figure):
+        is_plotly = True
+    elif isinstance(fig, graphviz_.Source):
+        is_graphviz = True
+    elif isinstance(fig, plt_.Figure):
+        is_plt = True
+    elif isinstance(fig, axes_.SubplotBase):
+        is_seaborn = True
+
+    if not filepath:
+        extension = 'html' if interactive and is_plotly else format
+        filepath = os.path.join(os.getcwd(), f'test_plot.{extension}')
+
+    filepath = _file_path_check(filepath, format=format, interactive=interactive, is_plotly=is_plotly)
+
+    if is_plotly and interactive:
+        fig.write_html(file=filepath)
+    elif is_plotly and not interactive:
+        fig.write_image(file=filepath, engine="kaleido")
+    elif is_graphviz:
+        filepath_, format_ = os.path.splitext(filepath)
+        fig.format = 'png'
+        filepath = f'{filepath_}.png'
+        fig.render(filename=filepath_, view=False, cleanup=True)
+    elif is_plt:
+        fig.savefig(fname=filepath)
+    elif is_seaborn:
+        fig = fig.figure
+        fig.savefig(fname=filepath)
+
+    if return_filepath:
+        return filepath
diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,11 @@
 -r core-requirements.txt
-plotly>=4.2.1
+plotly>=4.14.0
+kaleido>=0.1.0
 ipywidgets>=7.5
 xgboost>=0.82,<1.3.0
 catboost>=0.20
 lightgbm>=2.3.1
+matplotlib>=3.3.3
 graphviz>=0.13
+seaborn>=0.11.1
 category_encoders>=2.0.0