Skip to content

Commit

Permalink
[SPARK-34795][SPARK-35192][SPARK-35293][SPARK-35327][SQL][TESTS][3.1]…
Browse files Browse the repository at this point in the history
… Adds a new job in GitHub Actions to check the output of TPC-DS queries

### What changes were proposed in this pull request?

This PR proposes to add a new job in GitHub Actions to check the output of TPC-DS queries.

NOTE: To generate TPC-DS table data in GA jobs, this PR includes generator code implemented in apache#32243 and apache#32460.

This is the backport PR of apache#31886.

### Why are the changes needed?

There are some cases where we noticed runtime-realted bugs after merging commits (e.g. .SPARK-33822). Therefore, I think it is worth adding a new job in GitHub Actions to check query output of TPC-DS (sf=1).

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

The new test added.

Closes apache#32462 from maropu/TPCDSQUeryTestSuite-Branch3.1.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
  • Loading branch information
maropu authored and dongjoon-hyun committed May 9, 2021
1 parent 373454a commit 98674aa
Show file tree
Hide file tree
Showing 140 changed files with 18,614 additions and 579 deletions.
64 changes: 64 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -434,3 +434,67 @@ jobs:
- name: Build with SBT
run: |
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
tpcds-1g:
name: Run TPC-DS queries with SF=1
runs-on: ubuntu-20.04
env:
SPARK_LOCAL_IP: localhost
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: tpcds-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
tpcds-coursier-
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Cache TPC-DS generated data
id: cache-tpcds-sf-1
uses: actions/cache@v2
with:
path: ./tpcds-sf-1
key: tpcds-${{ hashFiles('.github/workflows/build_and_test.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
- name: Checkout tpcds-kit repository
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
uses: actions/checkout@v2
with:
repository: databricks/tpcds-kit
path: ./tpcds-kit
- name: Build tpcds-kit
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
run: cd tpcds-kit/tools && make OS=LINUX
- name: Generate TPC-DS (SF=1) table data
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
run: build/sbt "sql/test:runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"
- name: Run TPC-DS queries
run: |
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-tpcds--8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3
path: "**/target/unit-tests.log"
105 changes: 105 additions & 0 deletions sql/core/src/test/resources/tpcds-query-results/v1_4/q1.sql.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
-- Automatically generated by TPCDSQueryTestSuite

-- !query schema
struct<c_customer_id:string>
-- !query output
AAAAAAAAAAABAAAA
AAAAAAAAAAAHBAAA
AAAAAAAAAAAMAAAA
AAAAAAAAAAAOAAAA
AAAAAAAAAABCBAAA
AAAAAAAAAABEAAAA
AAAAAAAAAABFAAAA
AAAAAAAAAACFBAAA
AAAAAAAAAACFBAAA
AAAAAAAAAADBBAAA
AAAAAAAAAADOAAAA
AAAAAAAAAADPAAAA
AAAAAAAAAAEABAAA
AAAAAAAAAAEEAAAA
AAAAAAAAAAEGBAAA
AAAAAAAAAAENAAAA
AAAAAAAAAAFCBAAA
AAAAAAAAAAFEBAAA
AAAAAAAAAAFGAAAA
AAAAAAAAAAFLAAAA
AAAAAAAAAAFPAAAA
AAAAAAAAAAGCAAAA
AAAAAAAAAAGEAAAA
AAAAAAAAAAGIBAAA
AAAAAAAAAAGOAAAA
AAAAAAAAAAHABAAA
AAAAAAAAAAHGBAAA
AAAAAAAAAAHHAAAA
AAAAAAAAAAHMAAAA
AAAAAAAAAAHPAAAA
AAAAAAAAAAHPAAAA
AAAAAAAAAAHPAAAA
AAAAAAAAAAJAAAAA
AAAAAAAAAAJEBAAA
AAAAAAAAAAJMAAAA
AAAAAAAAAAJPAAAA
AAAAAAAAAAKBBAAA
AAAAAAAAAAKGBAAA
AAAAAAAAAAKHBAAA
AAAAAAAAAAKLAAAA
AAAAAAAAAALCAAAA
AAAAAAAAAALJAAAA
AAAAAAAAAALJAAAA
AAAAAAAAAAMABAAA
AAAAAAAAAAMGAAAA
AAAAAAAAAAMLAAAA
AAAAAAAAAAMMAAAA
AAAAAAAAAANHBAAA
AAAAAAAAAANMAAAA
AAAAAAAAAAOBBAAA
AAAAAAAAAAPDAAAA
AAAAAAAAAAPKAAAA
AAAAAAAAAAPLAAAA
AAAAAAAAABANAAAA
AAAAAAAAABCCBAAA
AAAAAAAAABCGAAAA
AAAAAAAAABDABAAA
AAAAAAAAABDBAAAA
AAAAAAAAABDEAAAA
AAAAAAAAABDEBAAA
AAAAAAAAABDEBAAA
AAAAAAAAABDFBAAA
AAAAAAAAABDOAAAA
AAAAAAAAABDOAAAA
AAAAAAAAABEBBAAA
AAAAAAAAABEDAAAA
AAAAAAAAABEEAAAA
AAAAAAAAABEEBAAA
AAAAAAAAABEIBAAA
AAAAAAAAABEOAAAA
AAAAAAAAABFFBAAA
AAAAAAAAABFHAAAA
AAAAAAAAABFNAAAA
AAAAAAAAABFOAAAA
AAAAAAAAABGAAAAA
AAAAAAAAABHDBAAA
AAAAAAAAABHGAAAA
AAAAAAAAABHGBAAA
AAAAAAAAABHLAAAA
AAAAAAAAABIAAAAA
AAAAAAAAABIBAAAA
AAAAAAAAABIDBAAA
AAAAAAAAABIEBAAA
AAAAAAAAABKLAAAA
AAAAAAAAABKNAAAA
AAAAAAAAABKNAAAA
AAAAAAAAABLJAAAA
AAAAAAAAABLNAAAA
AAAAAAAAABMAAAAA
AAAAAAAAABMEBAAA
AAAAAAAAABMPAAAA
AAAAAAAAABNABAAA
AAAAAAAAABNBAAAA
AAAAAAAAABNEAAAA
AAAAAAAAABNEAAAA
AAAAAAAAABNGAAAA
AAAAAAAAABNNAAAA
AAAAAAAAABOEAAAA
AAAAAAAAABOGBAAA
AAAAAAAAABPABAAA
11 changes: 11 additions & 0 deletions sql/core/src/test/resources/tpcds-query-results/v1_4/q10.sql.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
-- Automatically generated by TPCDSQueryTestSuite

-- !query schema
struct<cd_gender:string,cd_marital_status:string,cd_education_status:string,cnt1:bigint,cd_purchase_estimate:int,cnt2:bigint,cd_credit_rating:string,cnt3:bigint,cd_dep_count:int,cnt4:bigint,cd_dep_employed_count:int,cnt5:bigint,cd_dep_college_count:int,cnt6:bigint>
-- !query output
F W 4 yr Degree 1 4000 1 Low Risk 1 4 1 6 1 4 1
M D 4 yr Degree 1 1500 1 Low Risk 1 3 1 4 1 2 1
M S College 1 4500 1 High Risk 1 3 1 4 1 3 1
M S Primary 1 9500 1 Low Risk 1 3 1 0 1 6 1
M S Secondary 1 3000 1 High Risk 1 1 1 1 1 4 1
M U 4 yr Degree 1 2000 1 Low Risk 1 3 1 1 1 3 1
99 changes: 99 additions & 0 deletions sql/core/src/test/resources/tpcds-query-results/v1_4/q11.sql.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
-- Automatically generated by TPCDSQueryTestSuite

-- !query schema
struct<customer_preferred_cust_flag:string>
-- !query output
NULL
NULL
NULL
NULL
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
N
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Y
Loading

0 comments on commit 98674aa

Please sign in to comment.