Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-38761][SQL] DS V2 supports push down misc non-aggregate functions #36039

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ public String build(Expression expr) {
return visitNot(build(e.children()[0]));
case "~":
return visitUnaryArithmetic(name, inputToSQL(e.children()[0]));
case "ABS":
return visitAbs(build(e.children()[0]));
case "COALESCE": {
List<String> children =
Arrays.stream(e.children()).map(c -> build(c)).collect(Collectors.toList());
return visitCoalesce(children);
}
case "CASE_WHEN": {
List<String> children =
Arrays.stream(e.children()).map(c -> build(c)).collect(Collectors.toList());
Expand Down Expand Up @@ -188,6 +195,14 @@ protected String visitNot(String v) {
return "NOT (" + v + ")";
}

protected String visitAbs(String v) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks an overkill if we add one visit method for each function. How about just one

String visitSQLFunction(String funcName, String[] inputs) ...

return "ABS(" + v + ")";
}

protected String visitCoalesce(List<String> list) {
return "COALESCE(" + list.stream().collect(Collectors.joining(", ")) + ")";
}

protected String visitUnaryArithmetic(String name, String v) { return name + v; }

protected String visitCaseWhen(String[] children) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.util

import org.apache.spark.sql.catalyst.expressions.{Add, And, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Cast, Contains, Divide, EndsWith, EqualTo, Expression, In, InSet, IsNotNull, IsNull, Literal, Multiply, Not, Or, Predicate, Remainder, StartsWith, StringPredicate, Subtract, UnaryMinus}
import org.apache.spark.sql.catalyst.expressions.{Abs, Add, And, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Cast, Coalesce, Contains, Divide, EndsWith, EqualTo, Expression, In, InSet, IsNotNull, IsNull, Literal, Multiply, Not, Or, Predicate, Remainder, StartsWith, StringPredicate, Subtract, UnaryMinus}
import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, FieldReference, GeneralScalarExpression, LiteralValue}
import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate}
import org.apache.spark.sql.execution.datasources.PushableColumn
Expand Down Expand Up @@ -95,6 +95,15 @@ class V2ExpressionBuilder(
}
case Cast(child, dataType, _, true) =>
generateExpression(child).map(v => new V2Cast(v, dataType))
case Abs(child, true) => generateExpression(child)
.map(v => new GeneralScalarExpression("ABS", Array[V2Expression](v)))
case Coalesce(children) =>
val childrenExpressions = children.flatMap(generateExpression(_))
if (children.length == childrenExpressions.length) {
Some(new GeneralScalarExpression("COALESCE", childrenExpressions.toArray[V2Expression]))
} else {
None
}
case and: And =>
// AND expects predicate
val l = generateExpression(and.left, true)
Expand Down
50 changes: 26 additions & 24 deletions sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sort}
import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper}
import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
import org.apache.spark.sql.functions.{avg, count, count_distinct, lit, not, sum, udf, when}
import org.apache.spark.sql.functions.{abs, avg, coalesce, count, count_distinct, lit, not, sum, udf, when}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.util.Utils
Expand Down Expand Up @@ -381,19 +381,13 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
checkAnswer(df, Seq(Row("fred", 1), Row("mary", 2)))

val df2 = spark.table("h2.test.people").filter($"id" + Int.MaxValue > 1)

checkFiltersRemoved(df2, ansiMode)

df2.queryExecution.optimizedPlan.collect {
case _: DataSourceV2ScanRelation =>
val expected_plan_fragment = if (ansiMode) {
"PushedFilters: [ID IS NOT NULL, (ID + 2147483647) > 1], "
} else {
"PushedFilters: [ID IS NOT NULL], "
}
checkKeywordsExistsInExplain(df2, expected_plan_fragment)
val expectedPlanFragment2 = if (ansiMode) {
"PushedFilters: [ID IS NOT NULL, (ID + 2147483647) > 1], "
} else {
"PushedFilters: [ID IS NOT NULL], "
}

checkPushedInfo(df2, expectedPlanFragment2)
if (ansiMode) {
val e = intercept[SparkException] {
checkAnswer(df2, Seq.empty)
Expand Down Expand Up @@ -422,22 +416,30 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel

val df4 = spark.table("h2.test.employee")
.filter(($"salary" > 1000d).and($"salary" < 12000d))

checkFiltersRemoved(df4, ansiMode)

df4.queryExecution.optimizedPlan.collect {
case _: DataSourceV2ScanRelation =>
val expected_plan_fragment = if (ansiMode) {
"PushedFilters: [SALARY IS NOT NULL, " +
"CAST(SALARY AS double) > 1000.0, CAST(SALARY AS double) < 12000.0], "
} else {
"PushedFilters: [SALARY IS NOT NULL], "
}
checkKeywordsExistsInExplain(df4, expected_plan_fragment)
val expectedPlanFragment4 = if (ansiMode) {
"PushedFilters: [SALARY IS NOT NULL, " +
"CAST(SALARY AS double) > 1000.0, CAST(SALARY AS double) < 12000.0], "
} else {
"PushedFilters: [SALARY IS NOT NULL], "
}

checkPushedInfo(df4, expectedPlanFragment4)
checkAnswer(df4, Seq(Row(1, "amy", 10000, 1000, true),
Row(1, "cathy", 9000, 1200, false), Row(2, "david", 10000, 1300, true)))

val df5 = spark.table("h2.test.employee")
.filter(abs($"dept" - 3) > 1)
.filter(coalesce($"salary", $"bonus") > 2000)
checkFiltersRemoved(df5, ansiMode)
val expectedPlanFragment5 = if (ansiMode) {
"PushedFilters: [DEPT IS NOT NULL, ABS(DEPT - 3) > 1, " +
"(COALESCE(CAST(SALARY AS double), BONUS)) > 2000.0]"
} else {
"PushedFilters: [DEPT IS NOT NULL]"
}
checkPushedInfo(df5, expectedPlanFragment5)
checkAnswer(df5, Seq(Row(1, "amy", 10000, 1000, true),
Row(1, "cathy", 9000, 1200, false), Row(6, "jen", 12000, 1200, true)))
}
}
}
Expand Down