Set up sharding during testing (#99)

etna-team · Oct 6, 2023 · 029f0a9 · 029f0a9
1 parent 588fe25
commit 029f0a9
Show file tree

Hide file tree

Showing 16 changed files with 29 additions and 146 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -33,7 +33,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.8, 3.9, '3.10']
+        python-version: ['3.8', '3.10']
+        shard-id: [0, 1, 2]
       fail-fast: false
 
     steps:
@@ -58,128 +59,19 @@ jobs:
           path: .venv
           key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
 
-      # TODO: remove pip install after optuna fix
       - name: Install dependencies
         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
         run: |
           poetry install -E "all tests" -vv
-          poetry run pip install "sqlalchemy>=1.4,<2"
 
-      - name: PyTest ("not long")
+      - name: PyTest with sharding
         run: |
-          poetry run pytest tests -v --cov=etna -m "not long_1 and not long_2" --ignore=tests/test_experimental --cov-report=xml --durations=10
+          poetry run pytest tests -v --shard-id=${{ matrix.shard-id }} --num-shards=3 --cov=etna --ignore=tests/test_experimental --cov-report=xml --durations=10
           poetry run pytest etna -v --doctest-modules --ignore=etna/libs --durations=10
 
       - name: Upload coverage
         uses: codecov/codecov-action@v2
 
-  long-1-test:
-      runs-on: ubuntu-latest
-
-      steps:
-      - uses: actions/checkout@v2
-
-      - name: Set up Python
-        id: setup-python
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
-
-      - name: Install Poetry
-        uses: snok/install-poetry@v1
-        with:
-          virtualenvs-create: true
-          virtualenvs-in-project: true
-
-      - name: Load cached venv
-        id: cached-poetry-dependencies
-        uses: actions/cache@v2
-        with:
-          path: .venv
-          key: venv-${{ runner.os }}-3.8-${{ hashFiles('**/poetry.lock') }}
-
-      # TODO: remove pip install after optuna fix
-      - name: Install dependencies
-        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
-        run: |
-          poetry install -E "all tests" -vv
-          poetry run pip install "sqlalchemy>=1.4,<2"
-
-      - name: PyTest ("long")
-        run: |
-          poetry run pytest tests -v --cov=etna -m "long_1" --ignore=tests/test_experimental --cov-report=xml --durations=10
-
-      - name: Upload coverage
-        uses: codecov/codecov-action@v2
-
-  long-2-test:
-      runs-on: ubuntu-latest
-
-      steps:
-      - uses: actions/checkout@v2
-
-      - name: Set up Python
-        id: setup-python
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
-
-      - name: Install Poetry
-        uses: snok/install-poetry@v1
-        with:
-          virtualenvs-create: true
-          virtualenvs-in-project: true
-
-      - name: Load cached venv
-        id: cached-poetry-dependencies
-        uses: actions/cache@v2
-        with:
-          path: .venv
-          key: venv-${{ runner.os }}-3.8-${{ hashFiles('**/poetry.lock') }}
-
-      # TODO: remove pip install after optuna fix
-      - name: Install dependencies
-        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
-        run: |
-          poetry install -E "all tests" -vv
-          poetry run pip install "sqlalchemy>=1.4,<2"
-
-      - name: PyTest ("long")
-        run: |
-          poetry run pytest tests -v --cov=etna -m "long_2" --ignore=tests/test_experimental --cov-report=xml --durations=10
-
-      - name: Upload coverage
-        uses: codecov/codecov-action@v2
-
-  experimental-test:
-      runs-on: ubuntu-latest
-
-      steps:
-      - uses: actions/checkout@v2
-
-      - name: Set up Python
-        id: setup-python
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
-
-      - name: Install Poetry
-        uses: snok/install-poetry@v1
-        with:
-          virtualenvs-create: true
-          virtualenvs-in-project: true
-
-      - name: Install dependencies
-        run: |
-          poetry install -E "all tests" -vv
-
-      - name: PyTest ("experimental")
-        run: |
-          poetry run pytest tests/test_experimental -v --cov=etna --cov-report=xml
-
-      - name: Upload coverage
-        uses: codecov/codecov-action@v2
-
   test-pandas-versions:
     runs-on: ubuntu-latest
     strategy:
@@ -188,7 +80,8 @@ jobs:
           - ">=1.1,<1.2"
           - ">=1.2,<1.3"
           - ">=1.3,<1.4"
-          - ">=1.4"
+          - ">=1.4,<1.5"
+          - ">=1.5,<2"
       fail-fast: false
 
     steps:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Rework `get_started` notebook ([#1343](https://github.com/tinkoff-ai/etna/pull/1343))
 - Add missing classes from decomposition into API Reference, add modules into page titles in API Reference ([#61](https://github.com/etna-team/etna/pull/61))
 - Update `CONTRIBUTING.md` with scenarios of documentation updates and release instruction ([#77](https://github.com/etna-team/etna/pull/77))
+- Set up sharding for running tests ([#99](https://github.com/etna-team/etna/pull/99))
 
 ### Fixed
 - Fix `ResampleWithDistributionTransform` working with categorical columns ([#82](https://github.com/etna-team/etna/pull/82))

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -95,6 +95,7 @@ sphinx-design = {version = "^0.5.0", optional = true}
 pytest = {version = "^6.2", optional = true}
 coverage = {version = "^5.4", optional = true}
 pytest-cov = {version = "^2.11.1", optional = true}
+pytest-shard = {version = "^0.1.2", optional = true}
 
 black = {extras = ["jupyter"], version = "^22.3.0", optional = true}
 isort = {version = "^5.8.0", optional = true}
@@ -129,7 +130,7 @@ statsforecast = ["statsforecast"]
 # dev deps
 release = ["click", "semver"]
 docs = ["Sphinx", "nbsphinx", "sphinx-mathjax-offline", "myst-parser", "GitPython", "pydata-sphinx-theme", "sphinx-design", "jupyter"]
-tests = ["pytest-cov", "coverage", "pytest"]
+tests = ["pytest-cov", "coverage", "pytest", "pytest-shard"]
 jupyter = ["jupyter", "nbconvert", "black"]
 style = ["black", "isort", "flake8", "pep8-naming", "flake8-docstrings", "mypy", "types-PyYAML", "codespell", "flake8-bugbear", "flake8-comprehensions", "types-setuptools", "nbqa"]
 
@@ -149,7 +150,7 @@ all-dev = [
     "optuna", "sqlalchemy",
     "click", "semver",
     "Sphinx", "nbsphinx", "sphinx-mathjax-offline", "myst-parser", "GitPython", "pydata-sphinx-theme", "sphinx-design",
-    "pytest-cov", "coverage", "pytest",
+    "pytest-cov", "coverage", "pytest", "pytest-shard",
     "black", "isort", "flake8", "pep8-naming", "flake8-docstrings", "mypy", "types-PyYAML", "codespell", "flake8-bugbear", "flake8-comprehensions", "types-setuptools", "nbqa",
     "click", "semver",
     "jupyter", "nbconvert",
@@ -270,9 +271,7 @@ doctest_optionflags = "NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL NUMBER"
 #    "ignore: All-NaN slice encountered",
 #]
 markers = [
-    "smoke",
-    "long_1",
-    "long_2"
+    "smoke"
 ]
 
 [tool.coverage.report]

diff --git a/tests/test_auto/test_pool/test_pool.py b/tests/test_auto/test_pool/test_pool.py
@@ -1,7 +1,5 @@
 from copy import deepcopy
 
-import pytest
-
 from etna.auto.pool import Pool
 from etna.auto.pool.templates import DEFAULT
 from etna.datasets import TSDataset
@@ -13,7 +11,6 @@ def test_generate_config():
     assert len(pipelines) == len(DEFAULT)
 
 
-@pytest.mark.long_2
 def test_default_pool_fit_predict(example_reg_tsds):
     horizon = 7
     pipelines = Pool.default.value.generate(horizon=horizon)

diff --git a/tests/test_ensembles/test_stacking_ensemble.py b/tests/test_ensembles/test_stacking_ensemble.py
@@ -299,7 +299,6 @@ def test_forecast_sanity(weekly_period_ts: Tuple["TSDataset", "TSDataset"], naiv
     np.allclose(mae(test, forecast), 0)
 
 
-@pytest.mark.long_1
 def test_multiprocessing_ensembles(
     simple_df: TSDataset,
     catboost_pipeline: Pipeline,
@@ -321,7 +320,6 @@ def test_multiprocessing_ensembles(
     assert (single_jobs_forecast.df == multi_jobs_forecast.df).all().all()
 
 
-@pytest.mark.long_1
 @pytest.mark.parametrize("n_jobs", (1, 5))
 def test_backtest(stacking_ensemble_pipeline: StackingEnsemble, example_tsds: TSDataset, n_jobs: int):
     """Check that backtest works with StackingEnsemble."""
@@ -369,7 +367,6 @@ def test_predict_with_return_components_fails(example_tsds, naive_ensemble):
         naive_ensemble.predict(ts=example_tsds, return_components=True)
 
 
-@pytest.mark.long_1
 @pytest.mark.parametrize("n_jobs", (1, 4))
 def test_ts_with_segment_named_target(
     ts_with_segment_named_target: TSDataset, stacking_ensemble_pipeline: StackingEnsemble, n_jobs: int

diff --git a/tests/test_ensembles/test_voting_ensemble.py b/tests/test_ensembles/test_voting_ensemble.py
@@ -183,7 +183,6 @@ def test_predict_calls_vote(example_tsds: TSDataset, naive_pipeline_1: Pipeline,
     assert result == ensemble._vote.return_value
 
 
-@pytest.mark.long_1
 def test_multiprocessing_ensembles(
     simple_df: TSDataset,
     catboost_pipeline: Pipeline,
@@ -205,7 +204,6 @@ def test_multiprocessing_ensembles(
     assert (single_jobs_forecast.df == multi_jobs_forecast.df).all().all()
 
 
-@pytest.mark.long_1
 @pytest.mark.parametrize("n_jobs", (1, 5))
 def test_backtest(voting_ensemble_pipeline: VotingEnsemble, example_tsds: TSDataset, n_jobs: int):
     """Check that backtest works with VotingEnsemble."""

diff --git a/tests/test_models/nn/test_deepar.py b/tests/test_models/nn/test_deepar.py
@@ -28,7 +28,6 @@ def _get_default_dataset_builder(horizon: int):
     )
 
 
-@pytest.mark.long_2
 @pytest.mark.parametrize("horizon", [8, 21])
 def test_deepar_model_run_weekly_overfit(weekly_period_df, horizon, encoder_length=21):
     """
@@ -67,7 +66,6 @@ def test_deepar_model_run_weekly_overfit(weekly_period_df, horizon, encoder_leng
     assert mae(ts_test, ts_pred) < 0.2207
 
 
-@pytest.mark.long_2
 @pytest.mark.parametrize("horizon", [8])
 def test_deepar_model_run_weekly_overfit_with_scaler(
     ts_dataset_weekly_function_with_horizon, horizon, encoder_length=21

diff --git a/tests/test_models/nn/test_patchts.py b/tests/test_models/nn/test_patchts.py
@@ -10,7 +10,6 @@
 from tests.test_models.utils import assert_sampling_is_valid
 
 
-@pytest.mark.long_2
 @pytest.mark.parametrize(
     "horizon",
     [8, 13, 15],
@@ -33,7 +32,6 @@ def test_patchts_model_run_weekly_overfit_with_scaler_small_patch(ts_dataset_wee
     assert mae(ts_test, future) < 0.9
 
 
-@pytest.mark.long_2
 @pytest.mark.parametrize(
     "horizon",
     [8, 13, 15],

diff --git a/tests/test_models/nn/test_rnn.py b/tests/test_models/nn/test_rnn.py
@@ -11,7 +11,6 @@
 from tests.test_models.utils import assert_sampling_is_valid
 
 
-@pytest.mark.long_2
 @pytest.mark.parametrize(
     "horizon",
     [

diff --git a/tests/test_models/nn/test_tft.py b/tests/test_models/nn/test_tft.py
@@ -26,7 +26,6 @@ def _get_default_dataset_builder(horizon: int):
     )
 
 
-@pytest.mark.long_2
 @pytest.mark.parametrize("horizon", [8, 21])
 def test_tft_model_run_weekly_overfit(ts_dataset_weekly_function_with_horizon, horizon, encoder_length=21):
     """
@@ -60,7 +59,6 @@ def test_tft_model_run_weekly_overfit(ts_dataset_weekly_function_with_horizon, h
     assert mae(ts_test, ts_pred) < 0.24
 
 
-@pytest.mark.long_2
 @pytest.mark.parametrize("horizon", [8])
 def test_tft_model_run_weekly_overfit_with_scaler(ts_dataset_weekly_function_with_horizon, horizon, encoder_length=21):
     """

diff --git a/tests/test_models/test_prophet.py b/tests/test_models/test_prophet.py
@@ -349,7 +349,6 @@ def test_predict_components_names(
     assert set(components.columns) == expected_columns
 
 
-@pytest.mark.long_1
 @pytest.mark.parametrize("growth,cap", (("linear", []), ("logistic", ["cap"])))
 @pytest.mark.parametrize("regressors", (["f1", "f2"], []))
 @pytest.mark.parametrize("custom_seas", ([{"name": "s1", "period": 14, "fourier_order": 1}], []))

diff --git a/tests/test_models/test_sarimax_model.py b/tests/test_models/test_sarimax_model.py
@@ -222,7 +222,6 @@ def test_components_names(dfs_w_exog, regressors, regressors_components, trend,
     assert sorted(components.columns) == sorted(expected_components)
 
 
-@pytest.mark.long_2
 @pytest.mark.parametrize(
     "components_method_name,predict_method_name,in_sample",
     (("predict_components", "predict", True), ("forecast_components", "forecast", False)),