Skip to content

Commit

Permalink
[SPARK-42874][SQL] Enable new golden file test framework for analysis…
Browse files Browse the repository at this point in the history
… for all input files

### What changes were proposed in this pull request?

This PR enables the new golden file test framework for analysis for all input files.

Background:
* In #40449 we added the ability to exercise the analyzer on the SQL queries in existing golden files in the `sql/core/src/test/resources/sql-tests/inputs` directory, writing separate output test files in the new `sql/core/src/test/resources/sql-tests/analyzer-results` directory in additional to the original output directory for full end-to-end query execution results.
* That PR also added an allowlist of input files to include in this new dual-run mode.
* In this PR, we remove that allowlist exercise the new dual-run mode for all the input files. We also extend the analyzer testing to support separate test cases in ANSI-mode, TimestampNTZ, and UDFs.

### Why are the changes needed?

This improves test coverage and helps prevent against accidental regressions in the future as we edit the code.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

This PR adds testing only.

Closes #40496 from dtenedor/add-all-test-files.

Authored-by: Daniel Tenedorio <daniel.tenedorio@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
dtenedor authored and HyukjinKwon committed Mar 22, 2023
1 parent c75b689 commit 34c624e
Show file tree
Hide file tree
Showing 251 changed files with 154,290 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,16 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
transformAllExpressionsWithPruning(AlwaysProcess.fn, UnknownRuleId)(rule)
}

/**
* A variant of [[transformAllExpressions]] which considers plan nodes inside subqueries as well.
*/
def transformAllExpressionsWithSubqueries(
rule: PartialFunction[Expression, Expression]): this.type = {
transformWithSubqueries {
case q => q.transformExpressions(rule).asInstanceOf[PlanType]
}.asInstanceOf[this.type]
}

/**
* Returns the result of running [[transformExpressionsWithPruning]] on this node
* and all its children. Note that this method skips expressions inside subqueries.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,7 @@ select
array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000')
from primitive_arrays
-- !query analysis
Project [array_contains(boolean_array#x, true) AS array_contains(boolean_array, true)#x, array_contains(boolean_array#x, false) AS array_contains(boolean_array, false)#x, array_contains(tinyint_array#x, 2) AS array_contains(tinyint_array, 2)#x, array_contains(tinyint_array#x, 0) AS array_contains(tinyint_array, 0)#x, array_contains(smallint_array#x, 2) AS array_contains(smallint_array, 2)#x, array_contains(smallint_array#x, 0) AS array_contains(smallint_array, 0)#x, array_contains(int_array#x, 2) AS array_contains(int_array, 2)#x, array_contains(int_array#x, 0) AS array_contains(int_array, 0)#x, array_contains(bigint_array#x, 2) AS array_contains(bigint_array, 2)#x, array_contains(bigint_array#x, 0) AS array_contains(bigint_array, 0)#x, array_contains(decimal_array#x, 9223372036854775809) AS array_contains(decimal_array, 9223372036854775809)#x, array_contains(decimal_array#x, cast(1 as decimal(19,0))) AS array_contains(decimal_array, 1)#x, array_contains(double_array#x, 2.0) AS array_contains(double_array, 2.0)#x, array_contains(double_array#x, 0.0) AS array_contains(double_array, 0.0)#x, array_contains(float_array#x, cast(2.0 as float)) AS array_contains(float_array, 2.0)#x, array_contains(float_array#x, cast(0.0 as float)) AS array_contains(float_array, 0.0)#x, array_contains(date_array#x, 2016-03-14) AS array_contains(date_array, DATE '2016-03-14')#x, array_contains(date_array#x, 2016-01-01) AS array_contains(date_array, DATE '2016-01-01')#x, array_contains(timestamp_array#x, 2016-11-15 20:54:00) AS array_contains(timestamp_array, TIMESTAMP '2016-11-15 20:54:00')#x, array_contains(timestamp_array#x, 2016-01-01 20:54:00) AS array_contains(timestamp_array, TIMESTAMP '2016-01-01 20:54:00')#x]
+- SubqueryAlias primitive_arrays
+- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x])
+- Project [cast(boolean_array#x as array<boolean>) AS boolean_array#x, cast(tinyint_array#x as array<tinyint>) AS tinyint_array#x, cast(smallint_array#x as array<smallint>) AS smallint_array#x, cast(int_array#x as array<int>) AS int_array#x, cast(bigint_array#x as array<bigint>) AS bigint_array#x, cast(decimal_array#x as array<decimal(19,0)>) AS decimal_array#x, cast(double_array#x as array<double>) AS double_array#x, cast(float_array#x as array<float>) AS float_array#x, cast(date_array#x as array<date>) AS date_array#x, cast(timestamp_array#x as array<timestamp>) AS timestamp_array#x]
+- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x]
+- SubqueryAlias primitive_arrays
+- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x]
[Analyzer test output redacted due to nondeterminism]


-- !query
Expand Down Expand Up @@ -235,7 +229,7 @@ select
size(timestamp_array)
from primitive_arrays
-- !query analysis
Project [size(boolean_array#x, true) AS size(boolean_array)#x, size(tinyint_array#x, true) AS size(tinyint_array)#x, size(smallint_array#x, true) AS size(smallint_array)#x, size(int_array#x, true) AS size(int_array)#x, size(bigint_array#x, true) AS size(bigint_array)#x, size(decimal_array#x, true) AS size(decimal_array)#x, size(double_array#x, true) AS size(double_array)#x, size(float_array#x, true) AS size(float_array)#x, size(date_array#x, true) AS size(date_array)#x, size(timestamp_array#x, true) AS size(timestamp_array)#x]
Project [size(boolean_array#x, false) AS size(boolean_array)#x, size(tinyint_array#x, false) AS size(tinyint_array)#x, size(smallint_array#x, false) AS size(smallint_array)#x, size(int_array#x, false) AS size(int_array)#x, size(bigint_array#x, false) AS size(bigint_array)#x, size(decimal_array#x, false) AS size(decimal_array)#x, size(double_array#x, false) AS size(double_array)#x, size(float_array#x, false) AS size(float_array)#x, size(date_array#x, false) AS size(date_array)#x, size(timestamp_array#x, false) AS size(timestamp_array)#x]
+- SubqueryAlias primitive_arrays
+- View (`primitive_arrays`, [boolean_array#x,tinyint_array#x,smallint_array#x,int_array#x,bigint_array#x,decimal_array#x,double_array#x,float_array#x,date_array#x,timestamp_array#x])
+- Project [cast(boolean_array#x as array<boolean>) AS boolean_array#x, cast(tinyint_array#x as array<tinyint>) AS tinyint_array#x, cast(smallint_array#x as array<smallint>) AS smallint_array#x, cast(int_array#x as array<int>) AS int_array#x, cast(bigint_array#x as array<bigint>) AS bigint_array#x, cast(decimal_array#x as array<decimal(19,0)>) AS decimal_array#x, cast(double_array#x as array<double>) AS double_array#x, cast(float_array#x as array<float>) AS float_array#x, cast(date_array#x as array<date>) AS date_array#x, cast(timestamp_array#x as array<timestamp>) AS timestamp_array#x]
Expand All @@ -247,70 +241,70 @@ Project [size(boolean_array#x, true) AS size(boolean_array)#x, size(tinyint_arra
-- !query
select element_at(array(1, 2, 3), 5)
-- !query analysis
Project [element_at(array(1, 2, 3), 5, None, false) AS element_at(array(1, 2, 3), 5)#x]
Project [element_at(array(1, 2, 3), 5, None, true) AS element_at(array(1, 2, 3), 5)#x]
+- OneRowRelation


-- !query
select element_at(array(1, 2, 3), -5)
-- !query analysis
Project [element_at(array(1, 2, 3), -5, None, false) AS element_at(array(1, 2, 3), -5)#x]
Project [element_at(array(1, 2, 3), -5, None, true) AS element_at(array(1, 2, 3), -5)#x]
+- OneRowRelation


-- !query
select element_at(array(1, 2, 3), 0)
-- !query analysis
Project [element_at(array(1, 2, 3), 0, None, false) AS element_at(array(1, 2, 3), 0)#x]
Project [element_at(array(1, 2, 3), 0, None, true) AS element_at(array(1, 2, 3), 0)#x]
+- OneRowRelation


-- !query
select elt(4, '123', '456')
-- !query analysis
Project [elt(4, 123, 456, false) AS elt(4, 123, 456)#x]
Project [elt(4, 123, 456, true) AS elt(4, 123, 456)#x]
+- OneRowRelation


-- !query
select elt(0, '123', '456')
-- !query analysis
Project [elt(0, 123, 456, false) AS elt(0, 123, 456)#x]
Project [elt(0, 123, 456, true) AS elt(0, 123, 456)#x]
+- OneRowRelation


-- !query
select elt(-1, '123', '456')
-- !query analysis
Project [elt(-1, 123, 456, false) AS elt(-1, 123, 456)#x]
Project [elt(-1, 123, 456, true) AS elt(-1, 123, 456)#x]
+- OneRowRelation


-- !query
select elt(null, '123', '456')
-- !query analysis
Project [elt(cast(null as int), 123, 456, false) AS elt(NULL, 123, 456)#x]
Project [elt(cast(null as int), 123, 456, true) AS elt(NULL, 123, 456)#x]
+- OneRowRelation


-- !query
select elt(null, '123', null)
-- !query analysis
Project [elt(cast(null as int), 123, cast(null as string), false) AS elt(NULL, 123, NULL)#x]
Project [elt(cast(null as int), 123, cast(null as string), true) AS elt(NULL, 123, NULL)#x]
+- OneRowRelation


-- !query
select elt(1, '123', null)
-- !query analysis
Project [elt(1, 123, cast(null as string), false) AS elt(1, 123, NULL)#x]
Project [elt(1, 123, cast(null as string), true) AS elt(1, 123, NULL)#x]
+- OneRowRelation


-- !query
select elt(2, '123', null)
-- !query analysis
Project [elt(2, 123, cast(null as string), false) AS elt(2, 123, NULL)#x]
Project [elt(2, 123, cast(null as string), true) AS elt(2, 123, NULL)#x]
+- OneRowRelation


Expand Down Expand Up @@ -383,21 +377,21 @@ org.apache.spark.sql.AnalysisException
-- !query
select size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))
-- !query analysis
Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), true) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), false) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
+- OneRowRelation


-- !query
select size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))
-- !query analysis
Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), false) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
+- OneRowRelation


-- !query
select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10)))
-- !query analysis
Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x]
Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), false) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x]
+- OneRowRelation


Expand Down
Loading

0 comments on commit 34c624e

Please sign in to comment.