Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 64 additions & 2 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,64 @@ jobs:
id: set-matrix
run: echo "::set-output name=matrix::["`seq -s, 1 $SPARK_BENCHMARK_NUM_SPLITS`"]"

# Any TPC-DS related updates on this job need to be applied to tpcds-1g job of build_and_test.yml as well
tpcds-1g-gen:
name: "Generate an input dataset for TPCDSQueryBenchmark with SF=1"
runs-on: ubuntu-20.04
env:
SPARK_LOCAL_IP: localhost
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# In order to get diff files
with:
fetch-depth: 0
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: benchmark-coursier-${{ github.event.inputs.jdk }}-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
benchmark-coursier-${{ github.event.inputs.jdk }}
- name: Cache TPC-DS generated data
id: cache-tpcds-sf-1
uses: actions/cache@v2
with:
path: ./tpcds-sf-1
key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
- name: Checkout tpcds-kit repository
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
uses: actions/checkout@v2
with:
repository: databricks/tpcds-kit
ref: 2a5078a782192ddb6efbcead8de9973d6ab4f069
path: ./tpcds-kit
- name: Build tpcds-kit
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
run: cd tpcds-kit/tools && make OS=LINUX
- name: Install Java ${{ github.event.inputs.jdk }}
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
uses: actions/setup-java@v1
with:
java-version: ${{ github.event.inputs.jdk }}
- name: Generate TPC-DS (SF=1) table data
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
run: build/sbt "sql/test:runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"

benchmark:
name: "Run benchmarks: ${{ github.event.inputs.class }} (JDK ${{ github.event.inputs.jdk }}, Scala ${{ github.event.inputs.scala }}, ${{ matrix.split }} out of ${{ github.event.inputs.num-splits }} splits)"
needs: matrix-gen
needs: [matrix-gen, tpcds-1g-gen]
# Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
runs-on: ubuntu-20.04
strategy:
Expand All @@ -73,6 +128,7 @@ jobs:
SPARK_LOCAL_IP: localhost
# To prevent spark.test.home not being set. See more detail in SPARK-36007.
SPARK_HOME: ${{ github.workspace }}
SPARK_TPCDS_DATA: ${{ github.workspace }}/tpcds-sf-1
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
Expand Down Expand Up @@ -101,6 +157,12 @@ jobs:
uses: actions/setup-java@v1
with:
java-version: ${{ github.event.inputs.jdk }}
- name: Cache TPC-DS generated data
id: cache-tpcds-sf-1
uses: actions/cache@v2
with:
path: ./tpcds-sf-1
key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
- name: Run benchmarks
run: |
dev/change-scala-version.sh ${{ github.event.inputs.scala }}
Expand All @@ -119,7 +181,7 @@ jobs:
# To keep the directory structure and file permissions, tar them
# See also https://github.com/actions/upload-artifact#maintaining-file-permissions-and-case-sensitive-files
echo "Preparing the benchmark results:"
tar -cvf benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude-standard`
tar -cvf benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude=tpcds-sf-1 --exclude-standard`
- name: Upload benchmark results
uses: actions/upload-artifact@v2
with:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,7 @@ jobs:
./dev/change-scala-version.sh 2.13
./build/sbt -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile

# Any TPC-DS related updates on this job need to be applied to tpcds-1g-gen job of benchmark.yml as well
tpcds-1g:
needs: precondition
if: fromJson(needs.precondition.outputs.required).tpcds-1g == 'true'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,6 @@ object Benchmarks {
require(args.length > 0, "Benchmark class to run should be specified.")
if (
info.getName.endsWith("Benchmark") &&
// TODO(SPARK-34927): Support TPCDSQueryBenchmark in Benchmarks
!info.getName.endsWith("TPCDSQueryBenchmark") &&
matcher.matches(Paths.get(info.getName)) &&
Try(runBenchmark).isSuccess && // Does this has a main method?
!Modifier.isAbstract(clazz.getModifiers) // Is this a regular class?
Expand Down
Loading