Refactor tests to avoid using importorskip (#3126)

* Refactor test files * Add to release notes * Update comments in bash script * Add fixtures for test modules
alteryx · Dec 7, 2021 · 4bfe5f4 · 4bfe5f4
1 parent 30b2a6a
commit 4bfe5f4
Show file tree

Hide file tree

Showing 34 changed files with 367 additions and 481 deletions.
diff --git a/Makefile b/Makefile
@@ -9,6 +9,7 @@ clean:
 .PHONY: lint
 lint:
 	isort --check-only evalml
+	sh ./import_or_skip.sh
 	python docs/notebook_version_standardizer.py check-versions
 	python docs/notebook_version_standardizer.py check-execution
 	black evalml -t py39 --check

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -11,6 +11,7 @@ Release Notes
         * Removed indices information from the output of ``HighlyNullDataCheck``'s ``validate()`` method :pr:`3092`
     * Documentation Changes
     * Testing Changes
+        * Refactored tests to avoid using ``importorskip`` :pr:`3126`
 
 .. warning::
 

diff --git a/evalml/tests/automl_tests/test_automl_search_classification.py b/evalml/tests/automl_tests/test_automl_search_classification.py
@@ -595,11 +595,9 @@ def test_plot_disabled_missing_dependency(X_y_binary, has_minimal_dependencies):
         automl.plot.search_iteration_plot
 
 
-def test_plot_iterations_max_iterations(X_y_binary):
-    go = pytest.importorskip(
-        "plotly.graph_objects",
-        reason="Skipping plotting test because plotly not installed",
-    )
+@pytest.mark.noncore_dependency
+def test_plot_iterations_max_iterations(X_y_binary, go):
+
     X, y = X_y_binary
 
     automl = AutoMLSearch(
@@ -623,11 +621,8 @@ def test_plot_iterations_max_iterations(X_y_binary):
     assert len(y) == 3
 
 
-def test_plot_iterations_max_time(AutoMLTestEnv, X_y_binary):
-    go = pytest.importorskip(
-        "plotly.graph_objects",
-        reason="Skipping plotting test because plotly not installed",
-    )
+@pytest.mark.noncore_dependency
+def test_plot_iterations_max_time(AutoMLTestEnv, X_y_binary, go):
     X, y = X_y_binary
 
     automl = AutoMLSearch(
@@ -654,16 +649,9 @@ def test_plot_iterations_max_time(AutoMLTestEnv, X_y_binary):
     assert len(y) > 0
 
 
+@pytest.mark.noncore_dependency
 @patch("IPython.display.display")
 def test_plot_iterations_ipython_mock(mock_ipython_display, X_y_binary):
-    pytest.importorskip(
-        "IPython.display",
-        reason="Skipping plotting test because ipywidgets not installed",
-    )
-    pytest.importorskip(
-        "plotly.graph_objects",
-        reason="Skipping plotting test because plotly not installed",
-    )
     X, y = X_y_binary
 
     automl = AutoMLSearch(
@@ -680,16 +668,11 @@ def test_plot_iterations_ipython_mock(mock_ipython_display, X_y_binary):
     mock_ipython_display.assert_called_with(plot.best_score_by_iter_fig)
 
 
+@pytest.mark.noncore_dependency
 @patch("IPython.display.display")
-def test_plot_iterations_ipython_mock_import_failure(mock_ipython_display, X_y_binary):
-    pytest.importorskip(
-        "IPython.display",
-        reason="Skipping plotting test because ipywidgets not installed",
-    )
-    go = pytest.importorskip(
-        "plotly.graph_objects",
-        reason="Skipping plotting test because plotly not installed",
-    )
+def test_plot_iterations_ipython_mock_import_failure(
+    mock_ipython_display, X_y_binary, go
+):
     X, y = X_y_binary
 
     automl = AutoMLSearch(
@@ -1431,6 +1414,7 @@ def test_automl_search_dictionary_undersampler(
     assert len(mock_est_fit.call_args[0][0]) == length
 
 
+@pytest.mark.noncore_dependency
 @pytest.mark.parametrize(
     "problem_type,sampling_ratio_dict,length",
     [
@@ -1457,9 +1441,6 @@ def test_automl_search_dictionary_oversampler(
     sampling_ratio_dict,
     length,
 ):
-    pytest.importorskip(
-        "imblearn", reason="Skipping tests since imblearn isn't installed"
-    )
     # split this from the undersampler since the dictionaries are formatted differently
     X = pd.DataFrame({"a": [i for i in range(1200)], "b": [i % 3 for i in range(1200)]})
     if problem_type == "binary":

diff --git a/evalml/tests/automl_tests/test_automl_search_regression.py b/evalml/tests/automl_tests/test_automl_search_regression.py
@@ -163,11 +163,9 @@ def test_plot_disabled_missing_dependency(X_y_regression, has_minimal_dependenci
         automl.plot.search_iteration_plot
 
 
-def test_plot_iterations_max_iterations(X_y_regression):
-    go = pytest.importorskip(
-        "plotly.graph_objects",
-        reason="Skipping plotting test because plotly not installed",
-    )
+@pytest.mark.noncore_dependency
+def test_plot_iterations_max_iterations(X_y_regression, go):
+
     X, y = X_y_regression
 
     automl = AutoMLSearch(
@@ -186,11 +184,9 @@ def test_plot_iterations_max_iterations(X_y_regression):
     assert len(y) == 3
 
 
-def test_plot_iterations_max_time(AutoMLTestEnv, X_y_regression):
-    go = pytest.importorskip(
-        "plotly.graph_objects",
-        reason="Skipping plotting test because plotly not installed",
-    )
+@pytest.mark.noncore_dependency
+def test_plot_iterations_max_time(AutoMLTestEnv, X_y_regression, go):
+
     X, y = X_y_regression
 
     automl = AutoMLSearch(

diff --git a/evalml/tests/automl_tests/test_automl_utils.py b/evalml/tests/automl_tests/test_automl_utils.py
@@ -280,11 +280,8 @@ def test_get_best_sampler_for_data_sampler_method(
         assert name_output == "Oversampler"
 
 
+@pytest.mark.noncore_dependency
 def test_get_best_sampler_for_data_nonnumeric_noncategorical_columns(X_y_binary):
-    pytest.importorskip(
-        "imblearn.over_sampling",
-        reason="Skipping oversampling test because imbalanced-learn is not installed",
-    )
     X, y = X_y_binary
     X = pd.DataFrame(X)
     y = pd.Series([i % 5 == 0 for i in range(100)])

diff --git a/evalml/tests/automl_tests/test_iterative_algorithm.py b/evalml/tests/automl_tests/test_iterative_algorithm.py
@@ -351,20 +351,13 @@ def test_iterative_algorithm_passes_njobs(
                 algo.add_result(score, pipeline, {"id": algo.pipeline_number})
 
 
-@patch("evalml.tuners.skopt_tuner.Optimizer.tell")
+@pytest.mark.noncore_dependency
 @pytest.mark.parametrize("is_regression", [True, False])
 @pytest.mark.parametrize("estimator", ["XGBoost", "CatBoost"])
+@patch("evalml.tuners.skopt_tuner.Optimizer.tell")
 def test_iterative_algorithm_passes_n_jobs_catboost_xgboost(
-    mock_opt_tell, X_y_binary, X_y_regression, is_regression, estimator
+    mock_opt_tell, is_regression, estimator, X_y_binary, X_y_regression
 ):
-    if estimator == "XGBoost":
-        pytest.importorskip(
-            "xgboost", reason="Skipping test because xgboost is not installed."
-        )
-    else:
-        pytest.importorskip(
-            "catboost", reason="Skipping test because catboost is not installed."
-        )
     if is_regression:
         X, y = X_y_regression
         component_graphs = {"graph": [f"{estimator} Regressor"]}

diff --git a/evalml/tests/automl_tests/test_pipeline_search_plots.py b/evalml/tests/automl_tests/test_pipeline_search_plots.py
@@ -6,12 +6,8 @@
 from evalml.automl.pipeline_search_plots import SearchIterationPlot
 
 
+@pytest.mark.noncore_dependency
 def test_search_iteration_plot_class():
-    pytest.importorskip(
-        "plotly.graph_objects",
-        reason="Skipping plotting test because plotly not installed",
-    )
-
     class MockObjective:
         def __init__(self):
             self.name = "Test Objective"
@@ -52,15 +48,12 @@ def __init__(self):
     assert y == [0.60, 0.75, 0.50]
 
 
+@pytest.mark.noncore_dependency
 @patch("evalml.automl.pipeline_search_plots.jupyter_check")
 @patch("evalml.automl.pipeline_search_plots.import_or_raise")
 def test_jupyter(import_check, jupyter_check):
     mock_data = MagicMock()
 
-    pytest.importorskip(
-        "plotly.graph_objects",
-        reason="Skipping plotting test because plotly not installed",
-    )
     jupyter_check.return_value = True
     with pytest.warns(None) as graph_valid:
         SearchIterationPlot(mock_data.results, mock_data.objective)

diff --git a/evalml/tests/component_tests/test_arima_regressor.py b/evalml/tests/component_tests/test_arima_regressor.py
@@ -3,18 +3,26 @@
 import numpy as np
 import pandas as pd
 import pytest
-from pytest import importorskip
 
 from evalml.model_family import ModelFamily
 from evalml.pipelines.components import ARIMARegressor
 from evalml.problem_types import ProblemTypes
 
-sktime_arima = importorskip(
-    "sktime.forecasting.arima", reason="Skipping test because sktime not installed"
-)
-forecasting = importorskip(
-    "sktime.forecasting.base", reason="Skipping test because sktime not installed"
-)
+pytestmark = pytest.mark.noncore_dependency
+
+
+@pytest.fixture(scope="module")
+def sktime_arima():
+    from sktime.forecasting import arima as sktime_arima
+
+    return sktime_arima
+
+
+@pytest.fixture(scope="module")
+def forecasting():
+    from sktime.forecasting import base as forecasting
+
+    return forecasting
 
 
 def test_model_family():
@@ -55,7 +63,8 @@ def test_match_indices(ts_data):
 
 @pytest.mark.parametrize("predict", [True, False])
 @pytest.mark.parametrize("dates_shape", [0, 1, 2])
-def test_format_dates(predict, dates_shape, ts_data):
+def test_format_dates(predict, dates_shape, ts_data, forecasting):
+
     X, y = ts_data
     date_index = pd.date_range("2020-10-02", "2020-11-01")
     if dates_shape == 1:
@@ -114,8 +123,9 @@ def test_fit_predict_ts_with_datetime_in_X_column(
 
 
 def test_fit_predict_ts_with_only_datetime_column_in_X(
-    ts_data_seasonal_train, ts_data_seasonal_test
+    ts_data_seasonal_train, ts_data_seasonal_test, sktime_arima, forecasting
 ):
+
     X, y = ts_data_seasonal_train
     X_test, y_test = ts_data_seasonal_test
     assert isinstance(X.index, pd.DatetimeIndex)
@@ -139,8 +149,9 @@ def test_fit_predict_ts_with_only_datetime_column_in_X(
 
 
 def test_fit_predict_ts_with_X_and_y_index_out_of_sample(
-    ts_data_seasonal_train, ts_data_seasonal_test
+    ts_data_seasonal_train, ts_data_seasonal_test, sktime_arima, forecasting
 ):
+
     X, y = ts_data_seasonal_train
     X_test, y_test = ts_data_seasonal_test
     assert isinstance(X.index, pd.DatetimeIndex)
@@ -168,10 +179,9 @@ def test_fit_predict_ts_with_X_and_y_index_out_of_sample(
     "evalml.pipelines.components.estimators.regressors.arima_regressor.ARIMARegressor._get_dates"
 )
 def test_fit_predict_ts_with_X_and_y_index(
-    mock_get_dates,
-    mock_format_dates,
-    ts_data_seasonal_train,
+    mock_get_dates, mock_format_dates, ts_data_seasonal_train, sktime_arima, forecasting
 ):
+
     X, y = ts_data_seasonal_train
     assert isinstance(X.index, pd.DatetimeIndex)
     assert isinstance(y.index, pd.DatetimeIndex)
@@ -202,8 +212,9 @@ def test_fit_predict_ts_with_X_and_y_index(
     "evalml.pipelines.components.estimators.regressors.arima_regressor.ARIMARegressor._get_dates"
 )
 def test_fit_predict_ts_with_X_not_y_index(
-    mock_get_dates, mock_format_dates, ts_data_seasonal_train
+    mock_get_dates, mock_format_dates, ts_data_seasonal_train, sktime_arima, forecasting
 ):
+
     X, y = ts_data_seasonal_train
     assert isinstance(X.index, pd.DatetimeIndex)
     assert isinstance(y.index, pd.DatetimeIndex)
@@ -237,8 +248,9 @@ def test_fit_predict_ts_with_X_not_y_index(
     "evalml.pipelines.components.estimators.regressors.arima_regressor.ARIMARegressor._get_dates"
 )
 def test_fit_predict_ts_with_y_not_X_index(
-    mock_get_dates, mock_format_dates, ts_data_seasonal_train
+    mock_get_dates, mock_format_dates, ts_data_seasonal_train, sktime_arima, forecasting
 ):
+
     X, y = ts_data_seasonal_train
 
     mock_get_dates.return_value = (y.index, X)
@@ -326,8 +338,9 @@ def test_fit_ts_without_y(ts_data):
 
 
 def test_fit_predict_ts_no_X_out_of_sample(
-    ts_data_seasonal_train, ts_data_seasonal_test
+    ts_data_seasonal_train, ts_data_seasonal_test, sktime_arima, forecasting
 ):
+
     X, y = ts_data_seasonal_train
     X_test, y_test = ts_data_seasonal_test
 
@@ -348,8 +361,9 @@ def test_fit_predict_ts_no_X_out_of_sample(
 
 @pytest.mark.parametrize("X_none", [True, False])
 def test_fit_predict_date_index_named_out_of_sample(
-    X_none, ts_data_seasonal_train, ts_data_seasonal_test
+    X_none, ts_data_seasonal_train, ts_data_seasonal_test, sktime_arima, forecasting
 ):
+
     X, y = ts_data_seasonal_train
     X_test, y_test = ts_data_seasonal_test
 
@@ -380,7 +394,10 @@ def test_fit_predict_date_index_named_out_of_sample(
 
 @pytest.mark.parametrize("freq_num", ["1", "2"])
 @pytest.mark.parametrize("freq_str", ["T", "M", "Y"])
-def test_different_time_units_out_of_sample(freq_str, freq_num):
+def test_different_time_units_out_of_sample(
+    freq_str, freq_num, sktime_arima, forecasting
+):
+
     datetime_ = pd.date_range("1/1/1870", periods=20, freq=freq_num + freq_str)
 
     X = pd.DataFrame(range(20), index=datetime_)

diff --git a/evalml/tests/component_tests/test_catboost_classifier.py b/evalml/tests/component_tests/test_catboost_classifier.py
@@ -1,12 +1,12 @@
 import warnings
 
 import pandas as pd
-from pytest import importorskip
+import pytest
 
 from evalml.pipelines.components import CatBoostClassifier
 from evalml.utils import SEED_BOUNDS
 
-importorskip("catboost", reason="Skipping test because catboost not installed")
+pytestmark = pytest.mark.noncore_dependency
 
 
 def test_catboost_classifier_random_seed_bounds_seed(X_y_binary):

diff --git a/evalml/tests/component_tests/test_catboost_regressor.py b/evalml/tests/component_tests/test_catboost_regressor.py
@@ -1,12 +1,12 @@
 import warnings
 
 import pandas as pd
-from pytest import importorskip
+import pytest
 
 from evalml.pipelines.components import CatBoostRegressor
 from evalml.utils import SEED_BOUNDS
 
-importorskip("catboost", reason="Skipping test because catboost not installed")
+pytestmark = pytest.mark.noncore_dependency
 
 
 def test_catboost_regressor_random_seed_bounds_seed(X_y_regression):