From 21db2f86f7c196c535e8f0b5675ae48cb2c372f7 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 25 Sep 2019 15:16:00 -0700 Subject: [PATCH] [SPARK-29237][SQL] Prevent real function names in expression example template ### What changes were proposed in this pull request? In the PR, I propose to replace function names in some expression examples by `_FUNC_`, and add a test to check that `_FUNC_` always present in all examples. ### Why are the changes needed? Binding of a function name to an expression is performed in `FunctionRegistry` which is single source of truth. Expression examples should avoid using function name directly because this can make the examples invalid in the future. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Added new test to `SQLQuerySuite` which analyses expression example, and check presence of `_FUNC_`. Closes #25924 from MaxGekk/fix-func-examples. Authored-by: Maxim Gekk Signed-off-by: Dongjoon Hyun --- .../catalyst/expressions/ExpressionInfo.java | 7 ++++++ .../expressions/datetimeExpressions.scala | 2 +- .../org/apache/spark/sql/SQLQuerySuite.scala | 24 +++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java index 769cf36c3df3f..8ee90ed6f4c3b 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.expressions; +import com.google.common.annotations.VisibleForTesting; + /** * Expression information, will be used to describe a expression. */ @@ -56,6 +58,11 @@ public String getArguments() { return arguments; } + @VisibleForTesting + public String getOriginalExamples() { + return examples; + } + public String getExamples() { return replaceFunctionName(examples); } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 0098226b3258c..2543828b6315f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1097,7 +1097,7 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S usage = "_FUNC_(timestamp, timezone) - Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'.", examples = """ Examples: - > SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul'); + > SELECT _FUNC_('2016-08-31', 'Asia/Seoul'); 2016-08-31 09:00:00 """, since = "1.5.0", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 28a027690db04..2ec668cd5d5de 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -115,6 +115,30 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession { } } + test("using _FUNC_ instead of function names in examples") { + val exampleRe = "(>.*;)".r + val ignoreSet = Set( + // Examples for CaseWhen show simpler syntax: + // `CASE WHEN ... THEN ... WHEN ... THEN ... END` + "org.apache.spark.sql.catalyst.expressions.CaseWhen", + // _FUNC_ is replaced by `locate` but `locate(... IN ...)` is not supported + "org.apache.spark.sql.catalyst.expressions.StringLocate", + // _FUNC_ is replaced by `%` which causes a parsing error on `SELECT %(2, 1.8)` + "org.apache.spark.sql.catalyst.expressions.Remainder", + // Examples demonstrate alternative names, see SPARK-20749 + "org.apache.spark.sql.catalyst.expressions.Length") + spark.sessionState.functionRegistry.listFunction().foreach { funcId => + val info = spark.sessionState.catalog.lookupFunctionInfo(funcId) + val className = info.getClassName + withClue(s"Expression class '$className'") { + val exprExamples = info.getOriginalExamples + if (!exprExamples.isEmpty && !ignoreSet.contains(className)) { + assert(exampleRe.findAllIn(exprExamples).toSet.forall(_.contains("_FUNC_"))) + } + } + } + } + test("SPARK-6743: no columns from cache") { Seq( (83, 0, 38),