From 7c728b2c2d6cbe2b8310cd8cf8aa4c2528b489c8 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 5 May 2024 23:01:01 -0700 Subject: [PATCH] [SPARK-48137][INFRA] Run `yarn` test only in PR builders and Daily CIs ### What changes were proposed in this pull request? We have been providing a dedicated test environment for `yarn` and `connect` module because they are flaky. - #45107 However, they are still flaky. So, this PR aims to run `yarn` test only in PR builders (if needed) and Daily CIs (always). - Reduce the irrelevant re-tries by triggering `YARN CI` only when we need to test `YARN` module. - Protect YARN CI from `connect` flakiness by providing an independent GitHub Action environment in PR Builders and Daily CIs. - Lastly, commit builder will offload YARN module tests to the daily CIs ### Why are the changes needed? - PR builders provide an extensive test coverage with YARN testing. - Daily CIs with YARN tests - NON-ANSI CI: https://github.com/apache/spark/actions/workflows/build_non_ansi.yml (1AM) - Java 21 SBT CI: https://github.com/apache/spark/actions/workflows/build_java21.yml (4AM) - RockDB UI CI: https://github.com/apache/spark/actions/workflows/build_rockdb_as_ui_backend.yml (6AM) - Maven Java 17 CI: https://github.com/apache/spark/actions/workflows/build_maven.yml (1PM) - Maven Java 21 CI: https://github.com/apache/spark/actions/workflows/build_maven_java21.yml (2PM) - Maven Java 21 on AppleSilicon CI: https://github.com/apache/spark/actions/workflows/build_maven_java21_macos14.yml (8PM every two days) - YARN CI has been flaky in GitHub Action environment and requires irrelevant re-tries very frequently. - https://github.com/apache/spark/actions/runs/8962451417/job/24611353908 (2024-05-05) - https://github.com/apache/spark/actions/runs/8962440192/job/24611326971 (2024-05-05) ``` [info] *** 6 TESTS FAILED *** [error] Failed tests: [error] org.apache.spark.deploy.yarn.YarnClusterSuite [error] (yarn / Test / test) sbt.TestsFailedException: Tests unsuccessful ``` Screenshot 2024-05-05 at 20 12 28 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual review. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46395 from dongjoon-hyun/SPARK-48137. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 12 ++++++++++-- .github/workflows/build_java21.yml | 1 + .github/workflows/build_non_ansi.yml | 3 ++- .github/workflows/build_rockdb_as_ui_backend.yml | 3 ++- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8a85d26c0eca4..4b4e34ecc014d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -80,12 +80,14 @@ jobs: pyspark=`./dev/is-changed.py -m $pyspark_modules` if [[ "${{ github.repository }}" != 'apache/spark' ]]; then pandas=$pyspark + yarn=`./dev/is-changed.py -m yarn` kubernetes=`./dev/is-changed.py -m kubernetes` sparkr=`./dev/is-changed.py -m sparkr` buf=true ui=true else pandas=false + yarn=false kubernetes=false sparkr=false buf=false @@ -102,6 +104,7 @@ jobs: \"tpcds-1g\": \"false\", \"docker-integration-tests\": \"false\", \"lint\" : \"true\", + \"yarn\" : \"$yarn\", \"k8s-integration-tests\" : \"$kubernetes\", \"buf\" : \"$buf\", \"ui\" : \"$ui\", @@ -155,8 +158,8 @@ jobs: - >- streaming, sql-kafka-0-10, streaming-kafka-0-10, streaming-kinesis-asl, kubernetes, hadoop-cloud, spark-ganglia-lgpl, protobuf - - >- - yarn, connect + - yarn + - connect # Here, we split Hive and SQL tests into some of slow ones and the rest of them. included-tags: [""] excluded-tags: [""] @@ -194,6 +197,11 @@ jobs: hive: hive2.3 excluded-tags: org.apache.spark.tags.ExtendedSQLTest,org.apache.spark.tags.SlowSQLTest comment: "- other tests" + exclude: + # Always run if yarn == 'true', even infra-image is skip (such as non-master job) + # In practice, the build will run in individual PR, but not against the individual commit + # in Apache Spark repository. + - modules: ${{ fromJson(needs.precondition.outputs.required).yarn != 'true' && 'yarn' }} env: MODULES_TO_TEST: ${{ matrix.modules }} EXCLUDED_TAGS: ${{ matrix.excluded-tags }} diff --git a/.github/workflows/build_java21.yml b/.github/workflows/build_java21.yml index a2fb0e6e2c1d5..871e1a9c07ef0 100644 --- a/.github/workflows/build_java21.yml +++ b/.github/workflows/build_java21.yml @@ -47,6 +47,7 @@ jobs: "sparkr": "true", "tpcds-1g": "true", "docker-integration-tests": "true", + "yarn": "true", "k8s-integration-tests": "true", "buf": "true", "ui": "true" diff --git a/.github/workflows/build_non_ansi.yml b/.github/workflows/build_non_ansi.yml index ff3fda4625cc8..9026276903206 100644 --- a/.github/workflows/build_non_ansi.yml +++ b/.github/workflows/build_non_ansi.yml @@ -44,5 +44,6 @@ jobs: "pyspark": "true", "sparkr": "true", "tpcds-1g": "true", - "docker-integration-tests": "true" + "docker-integration-tests": "true", + "yarn": "true" } diff --git a/.github/workflows/build_rockdb_as_ui_backend.yml b/.github/workflows/build_rockdb_as_ui_backend.yml index a1cc34f7b54fc..96009c41dbbf9 100644 --- a/.github/workflows/build_rockdb_as_ui_backend.yml +++ b/.github/workflows/build_rockdb_as_ui_backend.yml @@ -42,5 +42,6 @@ jobs: { "build": "true", "pyspark": "true", - "sparkr": "true" + "sparkr": "true", + "yarn": "true" }