Skip to content

Commit

Permalink
Merge branch 'master' into SPARK-40193-merge-filters
Browse files Browse the repository at this point in the history
  • Loading branch information
peter-toth committed Nov 7, 2022
2 parents b53fc68 + 93d13af commit 56c287f
Show file tree
Hide file tree
Showing 1,384 changed files with 92,398 additions and 20,428 deletions.
14 changes: 10 additions & 4 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ SPARK SHELL:
- "repl/**/*"
- "bin/spark-shell*"
SQL:
#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/test_streaming.py"]
#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/streaming/test_streaming.py"]
- "**/sql/**/*"
- "common/unsafe/**/*"
#- "!python/pyspark/sql/avro/**/*"
#- "!python/pyspark/sql/streaming/**/*"
#- "!python/pyspark/sql/tests/test_streaming.py"
#- "!python/pyspark/sql/tests/streaming/test_streaming.py"
- "bin/spark-sql*"
- "bin/beeline*"
- "sbin/*thriftserver*.sh"
Expand Down Expand Up @@ -125,7 +125,7 @@ STRUCTURED STREAMING:
- "**/sql/**/streaming/**/*"
- "connector/kafka-0-10-sql/**/*"
- "python/pyspark/sql/streaming/**/*"
- "python/pyspark/sql/tests/test_streaming.py"
- "python/pyspark/sql/tests/streaming/test_streaming.py"
- "**/*streaming.R"
PYTHON:
- "bin/pyspark*"
Expand All @@ -151,4 +151,10 @@ WEB UI:
- "**/*UI.scala"
DEPLOY:
- "sbin/**/*"

CONNECT:
- "connector/connect/**/*"
- "**/sql/sparkconnect/**/*"
- "python/pyspark/sql/**/connect/**/*"
PROTOBUF:
- "connector/protobuf/**/*"
- "python/pyspark/sql/protobuf/**/*"
30 changes: 16 additions & 14 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
steps:
- name: Generate matrix
id: set-matrix
run: echo "::set-output name=matrix::["`seq -s, 1 $SPARK_BENCHMARK_NUM_SPLITS`"]"
run: echo "matrix=["`seq -s, 1 $SPARK_BENCHMARK_NUM_SPLITS`"]" >> $GITHUB_OUTPUT

# Any TPC-DS related updates on this job need to be applied to tpcds-1g job of build_and_test.yml as well
tpcds-1g-gen:
Expand All @@ -65,12 +65,12 @@ jobs:
SPARK_LOCAL_IP: localhost
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
uses: actions/checkout@v3
# In order to get diff files
with:
fetch-depth: 0
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: |
build/apache-maven-*
Expand All @@ -81,21 +81,21 @@ jobs:
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: ~/.cache/coursier
key: benchmark-coursier-${{ github.event.inputs.jdk }}-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
benchmark-coursier-${{ github.event.inputs.jdk }}
- name: Cache TPC-DS generated data
id: cache-tpcds-sf-1
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: ./tpcds-sf-1
key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
- name: Checkout tpcds-kit repository
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
repository: databricks/tpcds-kit
ref: 2a5078a782192ddb6efbcead8de9973d6ab4f069
Expand All @@ -105,8 +105,9 @@ jobs:
run: cd tpcds-kit/tools && make OS=LINUX
- name: Install Java ${{ github.event.inputs.jdk }}
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
uses: actions/setup-java@v1
uses: actions/setup-java@v3
with:
distribution: temurin
java-version: ${{ github.event.inputs.jdk }}
- name: Generate TPC-DS (SF=1) table data
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
Expand All @@ -133,12 +134,12 @@ jobs:
SPARK_TPCDS_DATA: ${{ github.workspace }}/tpcds-sf-1
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
uses: actions/checkout@v3
# In order to get diff files
with:
fetch-depth: 0
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: |
build/apache-maven-*
Expand All @@ -149,27 +150,28 @@ jobs:
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: ~/.cache/coursier
key: benchmark-coursier-${{ github.event.inputs.jdk }}-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
benchmark-coursier-${{ github.event.inputs.jdk }}
- name: Install Java ${{ github.event.inputs.jdk }}
uses: actions/setup-java@v1
uses: actions/setup-java@v3
with:
distribution: temurin
java-version: ${{ github.event.inputs.jdk }}
- name: Cache TPC-DS generated data
if: contains(github.event.inputs.class, 'TPCDSQueryBenchmark') || contains(github.event.inputs.class, '*')
id: cache-tpcds-sf-1
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: ./tpcds-sf-1
key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
- name: Run benchmarks
run: |
dev/change-scala-version.sh ${{ github.event.inputs.scala }}
./build/sbt -Pscala-${{ github.event.inputs.scala }} -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pspark-ganglia-lgpl test:package
./build/sbt -Pscala-${{ github.event.inputs.scala }} -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pspark-ganglia-lgpl Test/package
# Make less noisy
cp conf/log4j2.properties.template conf/log4j2.properties
sed -i 's/rootLogger.level = info/rootLogger.level = warn/g' conf/log4j2.properties
Expand All @@ -186,7 +188,7 @@ jobs:
echo "Preparing the benchmark results:"
tar -cvf benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude=tpcds-sf-1 --exclude-standard`
- name: Upload benchmark results
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}-${{ matrix.split }}
path: benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}.tar
Expand Down

0 comments on commit 56c287f

Please sign in to comment.