From 4f6f16c2ebfb693884e1f673acf7c0bb377ee17c Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 26 May 2026 06:41:00 +0000 Subject: [PATCH 1/2] [INFRA] Share SBT compile artifact with python hosted runner CI jobs Generated-by: Claude Code (Opus 4.7) --- .../workflows/python_hosted_runner_test.yml | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/.github/workflows/python_hosted_runner_test.yml b/.github/workflows/python_hosted_runner_test.yml index a2466ac163ab7..7f86393451c81 100644 --- a/.github/workflows/python_hosted_runner_test.yml +++ b/.github/workflows/python_hosted_runner_test.yml @@ -56,8 +56,83 @@ on: type: string default: '{}' jobs: + # Precompile Spark with SBT once and publish target/ as an artifact for the + # matrix entries below to consume. Optional: any failure here degrades the + # matrix to its original local SBT build path. + precompile: + name: "Precompile Spark" + runs-on: ${{ inputs.os }} + # If this job fails or is cancelled, the matrix entries fall back to + # running the SBT build locally as before. + continue-on-error: true + env: + HADOOP_PROFILE: ${{ inputs.hadoop }} + HIVE_PROFILE: hive2.3 + SPARK_LOCAL_IP: localhost + GITHUB_PREV_SHA: ${{ github.event.before }} + steps: + - name: Checkout Spark repository + uses: actions/checkout@v6 + with: + fetch-depth: 0 + repository: apache/spark + ref: ${{ inputs.branch }} + - name: Sync the current branch with the latest in Apache Spark + if: github.repository != 'apache/spark' + run: | + echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV + git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/} + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty + - name: Cache SBT and Maven + # TODO(SPARK-54466): https://github.com/actions/runner-images/issues/13341 + if: ${{ runner.os != 'macOS' }} + uses: actions/cache@v5 + with: + path: | + build/apache-maven-* + build/*.jar + ~/.sbt + key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build- + - name: Cache Coursier local repository + # TODO(SPARK-54466): https://github.com/actions/runner-images/issues/13341 + if: ${{ runner.os != 'macOS' }} + uses: actions/cache@v5 + with: + path: ~/.cache/coursier + key: precompile-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }} + restore-keys: | + precompile-coursier- + - name: Install Java ${{ inputs.java }} + uses: actions/setup-java@v5 + with: + distribution: zulu + java-version: ${{ inputs.java }} + - name: Build Spark + run: | + ./build/sbt -Phadoop-3 -Pyarn -Pspark-ganglia-lgpl -Phadoop-cloud -Phive \ + -Pkubernetes -Pjvm-profiler -Pkinesis-asl -Phive-thriftserver \ + -Pdocker-integration-tests -Pvolcano \ + Test/package streaming-kinesis-asl-assembly/assembly connect/assembly assembly/package + - name: Package compile output + run: | + find . -type d -name target -not -path './build/*' -not -path './.git/*' -print0 \ + | tar --null -czf compile-artifact.tar.gz -T - + ls -lh compile-artifact.tar.gz + - name: Upload compile artifact + uses: actions/upload-artifact@v6 + with: + name: spark-compile-${{ inputs.os }}-${{ inputs.branch }}-${{ github.run_id }} + path: compile-artifact.tar.gz + retention-days: 1 + if-no-files-found: error + build: name: "Build modules: ${{ matrix.modules }}" + needs: precompile + if: (!cancelled()) runs-on: ${{ inputs.os }} # TODO(SPARK-54466): https://github.com/actions/runner-images/issues/13341 # timeout-minutes: 150 @@ -159,10 +234,28 @@ jobs: python${{matrix.python}} -m pip cache purge - name: List Python packages run: python${{matrix.python}} -m pip list + - name: Download precompiled artifact + id: download-precompiled + if: needs.precompile.result == 'success' + continue-on-error: true + uses: actions/download-artifact@v6 + with: + name: spark-compile-${{ inputs.os }}-${{ inputs.branch }}-${{ github.run_id }} + - name: Extract precompiled artifact + id: extract-precompiled + if: steps.download-precompiled.outcome == 'success' + continue-on-error: true + run: | + tar -xzf compile-artifact.tar.gz + rm compile-artifact.tar.gz # Run the tests. - name: Run tests env: ${{ fromJSON(inputs.envs) }} run: | + if [ "${{ steps.extract-precompiled.outcome }}" = "success" ]; then + export SKIP_SCALA_BUILD=true + echo "Reusing precompiled artifact, skipping local SBT build." + fi if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then export SKIP_PACKAGING=false echo "Python Packaging Tests Enabled!" From d8ff2a7460d2db3deb9ed62bdf5c95517d38bf5e Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 26 May 2026 09:06:39 +0000 Subject: [PATCH 2/2] [TEMP] Trigger python_hosted_runner_test on macos-26 from PR builder Validation-only commit. Revert before marking PR ready for review. Generated-by: Claude Code (Opus 4.7) --- .github/workflows/build_and_test.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index fb55b970e3875..8ec823e80ac19 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1687,3 +1687,15 @@ jobs: cd ui-test npm install --save-dev node --experimental-vm-modules node_modules/.bin/jest + + # TEMP for PR validation: exercise the python_hosted_runner_test.yml + # precompile changes on macos-26 as part of the PR builder. REVERT before + # marking this PR ready for review. + python-macos: + name: Python on macOS + uses: ./.github/workflows/python_hosted_runner_test.yml + with: + java: ${{ inputs.java }} + branch: ${{ inputs.branch }} + hadoop: ${{ inputs.hadoop }} + os: macos-26