-
Notifications
You must be signed in to change notification settings - Fork 28.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-28083][SQL] Support LIKE ... ESCAPE syntax #25001
Changes from 60 commits
4f5016a
8609a46
8992b5a
c9d7dfe
b5be74a
6ac3f21
5303564
aef7dcb
77f2d98
caef102
a0ceae1
28a566d
f360f43
be0c1d9
aa81a56
4b1fed8
3e3f2b6
9509c73
0bcb027
b5a3cc7
56f5f8a
64963ca
7fd5a7f
0650ef8
479e24a
103203f
5990545
31f1c7c
1c0440e
dbc4388
8a89f92
410dd85
33c7ad4
a57e25c
99bbc2f
bcda7d1
9901924
f5490c6
7f6e3d1
856242c
3d01650
420f1b9
2b4c59a
9f7707b
24a796e
6b60360
aa6c785
aa0f2f7
bb0be67
10f42f4
738ca18
134fd1f
24c4be7
3964414
88a4e3e
f539524
a891139
fd8c5a7
0a51849
64e49b7
9feb25d
4cc9e0a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,8 +70,8 @@ abstract class StringRegexExpression extends BinaryExpression | |
* Simple RegEx pattern matching function | ||
*/ | ||
@ExpressionDescription( | ||
usage = "str _FUNC_ pattern - Returns true if str matches pattern, " + | ||
"null if any arguments are null, false otherwise.", | ||
usage = "str _FUNC_ pattern[ ESCAPE escape] - Returns true if str matches `pattern` with " + | ||
"`escape`, null if any arguments are null, false otherwise.", | ||
arguments = """ | ||
Arguments: | ||
* str - a string expression | ||
|
@@ -83,16 +83,15 @@ abstract class StringRegexExpression extends BinaryExpression | |
% matches zero or more characters in the input (similar to .* in posix regular | ||
expressions) | ||
|
||
The escape character is '\'. If an escape character precedes a special symbol or another | ||
escape character, the following character is matched literally. It is invalid to escape | ||
any other character. | ||
|
||
Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order | ||
to match "\abc", the pattern should be "\\abc". | ||
|
||
When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it fallbacks | ||
to Spark 1.6 behavior regarding string literal parsing. For example, if the config is | ||
enabled, the pattern to match "\abc" should be "\abc". | ||
* escape - an optional string added since Spark 3.0. The default escape character is the '\'. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please update the comment as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK |
||
If an escape character precedes a special symbol or another escape character, the | ||
following character is matched literally. It is invalid to escape any other character. | ||
""", | ||
examples = """ | ||
Examples: | ||
|
@@ -104,19 +103,22 @@ abstract class StringRegexExpression extends BinaryExpression | |
spark.sql.parser.escapedStringLiterals false | ||
> SELECT '%SystemDrive%\\Users\\John' _FUNC_ '\%SystemDrive\%\\\\Users%'; | ||
true | ||
> SELECT '%SystemDrive%/Users/John' _FUNC_ '/%SystemDrive/%//Users%' ESCAPE '/'; | ||
true | ||
""", | ||
note = """ | ||
Use RLIKE to match with standard regular expressions. | ||
""", | ||
since = "1.0.0") | ||
// scalastyle:on line.contains.tab | ||
case class Like(left: Expression, right: Expression) extends StringRegexExpression { | ||
case class Like(left: Expression, right: Expression, escapeChar: Char = '\\') | ||
extends StringRegexExpression { | ||
|
||
override def escape(v: String): String = StringUtils.escapeLikeRegex(v) | ||
override def escape(v: String): String = StringUtils.escapeLikeRegex(v, escapeChar) | ||
|
||
override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches() | ||
|
||
override def toString: String = s"$left LIKE $right" | ||
override def toString: String = s"$left LIKE $right ESCAPE '$escapeChar'" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: we can skip printing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK |
||
|
||
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
val patternClass = classOf[Pattern].getName | ||
|
@@ -149,10 +151,18 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi | |
} else { | ||
val pattern = ctx.freshName("pattern") | ||
val rightStr = ctx.freshName("rightStr") | ||
// We need double escape to avoid org.codehaus.commons.compiler.CompileException. | ||
// '\\' will cause exception 'Single quote must be backslash-escaped in character literal'. | ||
// '\"' will cause exception 'Line break in literal not allowed'. | ||
val newEscapeChar = if (escapeChar == '\"' || escapeChar == '\\') { | ||
s"""\\\\\\$escapeChar""" | ||
} else { | ||
escapeChar | ||
} | ||
nullSafeCodeGen(ctx, ev, (eval1, eval2) => { | ||
s""" | ||
String $rightStr = $eval2.toString(); | ||
$patternClass $pattern = $patternClass.compile($escapeFunc($rightStr)); | ||
$patternClass $pattern = $patternClass.compile($escapeFunc($rightStr, '$newEscapeChar')); | ||
${ev.value} = $pattern.matcher($eval1.toString()).matches(); | ||
""" | ||
}) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -484,7 +484,7 @@ object LikeSimplification extends Rule[LogicalPlan] { | |
private val equalTo = "([^_%]*)".r | ||
|
||
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { | ||
case Like(input, Literal(pattern, StringType)) => | ||
case Like(input, Literal(pattern, StringType), opt) => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. opt => escapeChar There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK |
||
if (pattern == null) { | ||
// If pattern is null, return null value directly, since "col like null" == null. | ||
Literal(null, BooleanType) | ||
|
@@ -503,8 +503,7 @@ object LikeSimplification extends Rule[LogicalPlan] { | |
Contains(input, Literal(infix)) | ||
case equalTo(str) => | ||
EqualTo(input, Literal(str)) | ||
case _ => | ||
Like(input, Literal.create(pattern, StringType)) | ||
case _ => Like(input, Literal.create(pattern, StringType), opt) | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this doesn't apply to RLIKE?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes.