From b746f33fc66ce2ecdc6d72a9943fc2db00da0f45 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Tue, 8 Sep 2020 07:36:12 +0200 Subject: [PATCH] Removes stable tests from quarantine (#10768) We've observed the tests for last couple of weeks and it seems most of the tests marked with "quarantine" marker are succeeding in a stable way (https://github.com/apache/airflow/issues/10118) The removed tests have success ratio of > 95% (20 runs without problems) and this has been verified a week ago as well, so it seems they are rather stable. There are literally few that are either failing or causing the Quarantined builds to hang. I manually reviewed the master tests that failed for last few weeks and added the tests that are causing the build to hang. Seems that stability has improved - which might be casued by some temporary problems when we marked the quarantined builds or too "generous" way of marking test as quarantined, or maybe improvement comes from the #10368 as the docker engine and machines used to run the builds in GitHub experience far less load (image builds are executed in separate builds) so it might be that resource usage is decreased. Another reason might be Github Actions stability improvements. Or simply those tests are more stable when run isolation. We might still add failing tests back as soon we see them behave in a flaky way. The remaining quarantined tests that need to be fixed: * test_local_run (often hangs the build) * test_retry_handling_job * test_clear_multiple_external_task_marker * test_should_force_kill_process * test_change_state_for_tis_without_dagrun * test_cli_webserver_background We also move some of those tests to "heisentests" category Those testst run fine in isolation but fail the builds when run with all other tests: * TestImpersonation tests We might find that those heisentest can be fixed but for now we are going to run them in isolation. Also - since those quarantined tests are failing more often the "num runs" to track for those has been decreased to 10 to keep track of 10 last runs only. --- .github/workflows/ci.yml | 8 ++++---- TESTING.rst | 12 ++++++++++++ scripts/ci/docker-compose/base.yml | 1 + scripts/ci/testing/ci_run_airflow_testing.sh | 6 ++---- scripts/in_container/entrypoint_ci.sh | 9 +++++++++ tests/cli/commands/test_dag_command.py | 2 -- tests/cli/commands/test_task_command.py | 1 - tests/conftest.py | 18 ++++++++++++++++++ tests/executors/test_dask_executor.py | 3 --- tests/jobs/test_backfill_job.py | 1 - tests/jobs/test_local_task_job.py | 2 -- tests/jobs/test_scheduler_job.py | 5 +---- tests/sensors/test_timeout_sensor.py | 3 --- tests/test_impersonation.py | 2 +- tests/utils/test_process_utils.py | 2 +- tests/www/test_views.py | 1 - 16 files changed, 49 insertions(+), 27 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 61ae572d72236..bf89cdd3a5615 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -298,7 +298,7 @@ jobs: matrix: python-version: [3.6, 3.7] postgres-version: [9.6, 10] - test-type: [Core, Integration] + test-type: [Core, Integration, Heisentests] fail-fast: false env: BACKEND: postgres @@ -344,7 +344,7 @@ jobs: matrix: python-version: [3.7, 3.8] mysql-version: [5.7] - test-type: [Core, Integration] + test-type: [Core, Integration, Heisentests] fail-fast: false env: BACKEND: mysql @@ -388,7 +388,7 @@ jobs: strategy: matrix: python-version: [3.6, 3.8] - test-type: [Core, Integration] + test-type: [Core, Integration, Heisentests] fail-fast: false env: BACKEND: sqlite @@ -439,7 +439,7 @@ jobs: POSTGRES_VERSION: ${{ matrix.postgres-version }} RUN_TESTS: true TEST_TYPE: Quarantined - NUM_RUNS: 20 + NUM_RUNS: 10 GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} if: > (needs.trigger-tests.outputs.run-tests == 'true' || github.event_name != 'pull_request') && diff --git a/TESTING.rst b/TESTING.rst index 93649895f6e30..7b3386af8a149 100644 --- a/TESTING.rst +++ b/TESTING.rst @@ -324,6 +324,18 @@ Those tests are marked with ``@pytest.mark.quarantined`` annotation. Those tests are skipped by default. You can enable them with ``--include-quarantined`` flag. You can also decide to only run tests with ``-m quarantined`` flag to run only those tests. +Heisen tests +------------ + +Some of our tests are Heisentests. This means that they run fine in isolation but when they run together with +others they might fail the tests (this is likely due to resource consumptions). Therefore we run those tests +in isolation. + +Those tests are marked with ``@pytest.mark.heisentests`` annotation. +Those tests are skipped by default. You can enable them with ``--include-heisentests`` flag. You +can also decide to only run tests with ``-m heisentests`` flag to run only those tests. + + Running Tests with Kubernetes ============================= diff --git a/scripts/ci/docker-compose/base.yml b/scripts/ci/docker-compose/base.yml index f20be68c9c7cb..77792de458f87 100644 --- a/scripts/ci/docker-compose/base.yml +++ b/scripts/ci/docker-compose/base.yml @@ -41,6 +41,7 @@ services: - RUN_INTEGRATION_TESTS - ONLY_RUN_LONG_RUNNING_TESTS - ONLY_RUN_QUARANTINED_TESTS + - ONLY_RUN_HEISEN_TESTS - GITHUB_TOKEN - GITHUB_REPOSITORY - ISSUE_ID diff --git a/scripts/ci/testing/ci_run_airflow_testing.sh b/scripts/ci/testing/ci_run_airflow_testing.sh index 38df1425ba60c..62f0943984661 100755 --- a/scripts/ci/testing/ci_run_airflow_testing.sh +++ b/scripts/ci/testing/ci_run_airflow_testing.sh @@ -26,6 +26,8 @@ if [[ ${TEST_TYPE:=} == "Integration" ]]; then export RUN_INTEGRATION_TESTS="${AVAILABLE_INTEGRATIONS}" elif [[ ${TEST_TYPE:=} == "Long" ]]; then export ONLY_RUN_LONG_RUNNING_TESTS="true" +elif [[ ${TEST_TYPE:=} == "Heisentests" ]]; then + export ONLY_RUN_HEISEN_TESTS="true" elif [[ ${TEST_TYPE:=} == "Quarantined" ]]; then export ONLY_RUN_QUARANTINED_TESTS="true" # Do not fail in quarantined tests @@ -128,7 +130,3 @@ echo RUN_INTEGRATION_TESTS=${RUN_INTEGRATION_TESTS:=""} run_airflow_testing_in_docker "${@}" - -if [[ ${TEST_TYPE:=} == "Quarantined" ]]; then - export ONLY_RUN_QUARANTINED_TESTS="true" -fi diff --git a/scripts/in_container/entrypoint_ci.sh b/scripts/in_container/entrypoint_ci.sh index 25fb4806a2a56..55cfc827d3851 100755 --- a/scripts/in_container/entrypoint_ci.sh +++ b/scripts/in_container/entrypoint_ci.sh @@ -216,6 +216,15 @@ elif [[ ${ONLY_RUN_LONG_RUNNING_TESTS:=""} == "true" ]]; then "--execution-timeout=120" "--teardown-timeout=30" ) +elif [[ ${ONLY_RUN_HEISEN_TESTS:=""} == "true" ]]; then + EXTRA_PYTEST_ARGS+=( + "-m" "heisentests" + "--include-heisentests" + "--verbosity=1" + "--setup-timeout=20" + "--execution-timeout=50" + "--teardown-timeout=20" + ) elif [[ ${ONLY_RUN_QUARANTINED_TESTS:=""} == "true" ]]; then EXTRA_PYTEST_ARGS+=( "-m" "quarantined" diff --git a/tests/cli/commands/test_dag_command.py b/tests/cli/commands/test_dag_command.py index 03579ad76da86..7e776c5c11d25 100644 --- a/tests/cli/commands/test_dag_command.py +++ b/tests/cli/commands/test_dag_command.py @@ -23,7 +23,6 @@ from datetime import datetime, timedelta import mock -import pytest from airflow import settings from airflow.cli import cli_parser @@ -381,7 +380,6 @@ def test_pause(self): dag_command.dag_unpause(args) self.assertIn(self.dagbag.dags['example_bash_operator'].get_is_paused(), [False, 0]) - @pytest.mark.quarantined def test_trigger_dag(self): dag_command.dag_trigger(self.parser.parse_args([ 'dags', 'trigger', 'example_bash_operator', diff --git a/tests/cli/commands/test_task_command.py b/tests/cli/commands/test_task_command.py index 5d4918a2bb4d5..0a4b991667749 100644 --- a/tests/cli/commands/test_task_command.py +++ b/tests/cli/commands/test_task_command.py @@ -355,7 +355,6 @@ def setUp(self): self.parser = cli_parser.get_parser() - @pytest.mark.quarantined def test_run_ignores_all_dependencies(self): """ Test that run respects ignore_all_dependencies diff --git a/tests/conftest.py b/tests/conftest.py index eac88d6271903..20c072bd4099e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -154,6 +154,11 @@ def pytest_addoption(parser): action="store_true", help="Includes quarantined tests (marked with quarantined marker). They are skipped by default.", ) + group.addoption( + "--include-heisentests", + action="store_true", + help="Includes heisentests (marked with heisentests marker). They are skipped by default.", + ) allowed_trace_sql_columns_list = ",".join(ALLOWED_TRACE_SQL_COLUMNS) group.addoption( "--trace-sql", @@ -245,6 +250,9 @@ def pytest_configure(config): config.addinivalue_line( "markers", "quarantined: mark test that are in quarantine (i.e. flaky, need to be isolated and fixed)" ) + config.addinivalue_line( + "markers", "heisentests: mark test that should be run in isolation due to resource consumption" + ) config.addinivalue_line( "markers", "credential_file(name): mark tests that require credential file in CREDENTIALS_DIR" ) @@ -308,6 +316,13 @@ def skip_quarantined_test(item): format(item=item)) +def skip_heisen_test(item): + for _ in item.iter_markers(name="heisentests"): + pytest.skip("The test is skipped because it has heisentests marker. " + "And --include-heisentests flag is passed to pytest. {item}". + format(item=item)) + + def skip_if_integration_disabled(marker, item): integration_name = marker.args[0] environment_variable_name = "INTEGRATION_" + integration_name.upper() @@ -355,6 +370,7 @@ def pytest_runtest_setup(item): include_long_running = item.config.getoption("--include-long-running") include_quarantined = item.config.getoption("--include-quarantined") + include_heisentests = item.config.getoption("--include-heisentests") for marker in item.iter_markers(name="integration"): skip_if_integration_disabled(marker, item) @@ -373,6 +389,8 @@ def pytest_runtest_setup(item): skip_long_running_test(item) if not include_quarantined: skip_quarantined_test(item) + if not include_heisentests: + skip_heisen_test(item) skip_if_credential_file_missing(item) skip_if_airflow_2_test(item) diff --git a/tests/executors/test_dask_executor.py b/tests/executors/test_dask_executor.py index ea6e4f6f827b5..2f43271acb4a8 100644 --- a/tests/executors/test_dask_executor.py +++ b/tests/executors/test_dask_executor.py @@ -19,8 +19,6 @@ from datetime import timedelta from unittest import mock -import pytest - from airflow.jobs.backfill_job import BackfillJob from airflow.models import DagBag from airflow.utils import timezone @@ -83,7 +81,6 @@ def test_dask_executor_functions(self): executor = DaskExecutor(cluster_address=self.cluster.scheduler_address) self.assert_tasks_on_executor(executor) - @pytest.mark.quarantined def test_backfill_integration(self): """ Test that DaskExecutor can be used to backfill example dags diff --git a/tests/jobs/test_backfill_job.py b/tests/jobs/test_backfill_job.py index 7962172cd92e6..04abe9d757566 100644 --- a/tests/jobs/test_backfill_job.py +++ b/tests/jobs/test_backfill_job.py @@ -52,7 +52,6 @@ DEFAULT_DATE = timezone.datetime(2016, 1, 1) -@pytest.mark.quarantined class TestBackfillJob(unittest.TestCase): def _get_dummy_dag(self, dag_id, pool=Pool.DEFAULT_POOL_NAME, task_concurrency=None): diff --git a/tests/jobs/test_local_task_job.py b/tests/jobs/test_local_task_job.py index 1a8b4f9ccdb5c..09c18eaac3098 100644 --- a/tests/jobs/test_local_task_job.py +++ b/tests/jobs/test_local_task_job.py @@ -255,7 +255,6 @@ def test_localtaskjob_double_trigger(self): session.close() - @pytest.mark.quarantined def test_localtaskjob_maintain_heart_rate(self): dagbag = DagBag( dag_folder=TEST_DAG_FOLDER, @@ -360,7 +359,6 @@ def task_function(ti): self.assertNotIn('reached_end_of_sleep', data, 'Task should not have been allowed to run to completion') - @pytest.mark.quarantined def test_mark_success_on_success_callback(self): """ Test that ensures that where a task is marked suceess in the UI diff --git a/tests/jobs/test_scheduler_job.py b/tests/jobs/test_scheduler_job.py index 2f6fa6db5c53d..00325fe35ba36 100644 --- a/tests/jobs/test_scheduler_job.py +++ b/tests/jobs/test_scheduler_job.py @@ -1241,7 +1241,7 @@ def test_should_mark_dummy_task_as_success(self): self.assertIsNone(duration) -@pytest.mark.quarantined +@pytest.mark.heisentests class TestDagFileProcessorQueriesCount(unittest.TestCase): """ These tests are designed to detect changes in the number of queries for different DAG files. @@ -2154,7 +2154,6 @@ def test_execute_task_instances_limit(self): self.assertEqual(State.QUEUED, ti.state) @pytest.mark.quarantined - @pytest.mark.xfail(condition=True, reason="The test is flaky with nondeterministic result") def test_change_state_for_tis_without_dagrun(self): dag1 = DAG(dag_id='test_change_state_for_tis_without_dagrun', start_date=DEFAULT_DATE) @@ -2937,7 +2936,6 @@ def do_schedule(mock_dagbag): ti.refresh_from_db() self.assertEqual(State.SUCCESS, ti.state) - @pytest.mark.quarantined def test_retry_still_in_executor(self): """ Checks if the scheduler does not put a task in limbo, when a task is retried @@ -3025,7 +3023,6 @@ def run_with_error(ti, ignore_ti_state=False): self.assertEqual(ti.state, State.SUCCESS) @pytest.mark.quarantined - @pytest.mark.xfail(condition=True, reason="This test is failing!") def test_retry_handling_job(self): """ Integration test of the scheduler not accidentally resetting diff --git a/tests/sensors/test_timeout_sensor.py b/tests/sensors/test_timeout_sensor.py index 70228ddc48584..09b35b3ad2247 100644 --- a/tests/sensors/test_timeout_sensor.py +++ b/tests/sensors/test_timeout_sensor.py @@ -19,8 +19,6 @@ import unittest from datetime import timedelta -import pytest - from airflow.exceptions import AirflowSensorTimeout, AirflowSkipException from airflow.models.dag import DAG from airflow.sensors.base_sensor_operator import BaseSensorOperator @@ -73,7 +71,6 @@ def setUp(self): } self.dag = DAG(TEST_DAG_ID, default_args=args) - @pytest.mark.quarantined def test_timeout(self): op = TimeoutTestSensor( task_id='test_timeout', diff --git a/tests/test_impersonation.py b/tests/test_impersonation.py index 56c5cdf43b0c3..30df63c06bd06 100644 --- a/tests/test_impersonation.py +++ b/tests/test_impersonation.py @@ -109,7 +109,7 @@ def create_user(): ) -@pytest.mark.quarantined +@pytest.mark.heisentests class TestImpersonation(unittest.TestCase): def setUp(self): diff --git a/tests/utils/test_process_utils.py b/tests/utils/test_process_utils.py index e67afc97ffd96..1620bfe747bf9 100644 --- a/tests/utils/test_process_utils.py +++ b/tests/utils/test_process_utils.py @@ -125,7 +125,6 @@ def my_sleep_subprocess_with_signals(): sleep(100) -@pytest.mark.quarantined class TestKillChildProcessesByPids(unittest.TestCase): def test_should_kill_process(self): before_num_process = subprocess.check_output(["ps", "-ax", "-o", "pid="]).decode().count("\n") @@ -142,6 +141,7 @@ def test_should_kill_process(self): num_process = subprocess.check_output(["ps", "-ax", "-o", "pid="]).decode().count("\n") self.assertEqual(before_num_process, num_process) + @pytest.mark.quarantined def test_should_force_kill_process(self): before_num_process = subprocess.check_output(["ps", "-ax", "-o", "pid="]).decode().count("\n") diff --git a/tests/www/test_views.py b/tests/www/test_views.py index 348d3a9accd22..f276dd8738bdc 100644 --- a/tests/www/test_views.py +++ b/tests/www/test_views.py @@ -2411,7 +2411,6 @@ def test_user_defined_filter_and_macros_raise_error(self): ) -@pytest.mark.quarantined class TestTriggerDag(TestBase): def setUp(self):