Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-29237][SQL] Prevent real function names in expression example template #25924

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.catalyst.expressions;

import com.google.common.annotations.VisibleForTesting;

/**
* Expression information, will be used to describe a expression.
*/
Expand Down Expand Up @@ -56,6 +58,11 @@ public String getArguments() {
return arguments;
}

@VisibleForTesting
public String getOriginalExamples() {
return examples;
}

public String getExamples() {
return replaceFunctionName(examples);
}
Expand Down
Expand Up @@ -1097,7 +1097,7 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
usage = "_FUNC_(timestamp, timezone) - Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'.",
examples = """
Examples:
> SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul');
> SELECT _FUNC_('2016-08-31', 'Asia/Seoul');
2016-08-31 09:00:00
""",
since = "1.5.0",
Expand Down
24 changes: 24 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Expand Up @@ -115,6 +115,30 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
}
}

test("using _FUNC_ instead of function names in examples") {
val exampleRe = "(>.*;)".r
val ignoreSet = Set(
// Examples for CaseWhen show simpler syntax:
// `CASE WHEN ... THEN ... WHEN ... THEN ... END`
"org.apache.spark.sql.catalyst.expressions.CaseWhen",
// _FUNC_ is replaced by `locate` but `locate(... IN ...)` is not supported
"org.apache.spark.sql.catalyst.expressions.StringLocate",
// _FUNC_ is replaced by `%` which causes a parsing error on `SELECT %(2, 1.8)`
"org.apache.spark.sql.catalyst.expressions.Remainder",
// Examples demonstrate alternative names, see SPARK-20749
"org.apache.spark.sql.catalyst.expressions.Length")
spark.sessionState.functionRegistry.listFunction().foreach { funcId =>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, @MaxGekk . This is super useful.

val info = spark.sessionState.catalog.lookupFunctionInfo(funcId)
val className = info.getClassName
withClue(s"Expression class '$className'") {
val exprExamples = info.getOriginalExamples
if (!exprExamples.isEmpty && !ignoreSet.contains(className)) {
assert(exampleRe.findAllIn(exprExamples).toSet.forall(_.contains("_FUNC_")))
}
}
}
}

test("SPARK-6743: no columns from cache") {
Seq(
(83, 0, 38),
Expand Down