Skip to content

Commit

Permalink
Address comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed May 10, 2017
1 parent 04a9fd3 commit 9ce7eb0
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -428,99 +428,76 @@ class ExpressionParserSuite extends PlanTest {
}

test("strings") {
// The SQL commands when ESCAPED_STRING_LITERALS = false (default behavior)
val sqlCommands = Seq(
// Single Strings.
"\"hello\"",
"'hello'",
// Multi-Strings.
"\"hello\" 'world'",
"'hello' \" \" 'world'",
// 'LIKE' string literals.
"'pattern%'",
"'no-pattern\\%'",
"'pattern\\\\%'",
"'pattern\\\\\\%'",
// Escaped characters.
"'\\0'",
"'\\\"'",
"'\\b'",
"'\\n'",
"'\\r'",
"'\\t'",
// Octals
"'\\110\\145\\154\\154\\157\\041'",
// Unicode
"'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'")

// The SQL commands when ESCAPED_STRING_LITERALS = true
val fallbackSqlCommands = Seq(
// Single Strings.
"\"hello\"",
"'hello'",
// Multi-Strings.
"\"hello\" 'world'",
"'hello' \" \" 'world'",
// 'LIKE' string literals.
"'pattern%'",
"'no-pattern\\%'",
"'pattern\\%'",
"'pattern\\\\%'",
// Escaped characters.
"'\0'",
"'\"'",
"'\b'",
"'\n'",
"'\r'",
"'\t'",
// Octals
"'\110\145\154\154\157\041'",
// Unicode
"'\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'")

val expectedResults = Seq(
Seq(true, false).foreach { escape =>
val conf = new SQLConf()
conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, escape.toString)
val parser = new CatalystSqlParser(conf)

// tests that have same result whatever the conf is
// Single Strings.
"hello",
"hello",
assertEqual("\"hello\"", "hello", parser)
assertEqual("'hello'", "hello", parser)

// Multi-Strings.
"helloworld",
"hello world",
assertEqual("\"hello\" 'world'", "helloworld", parser)
assertEqual("'hello' \" \" 'world'", "hello world", parser)

// 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
// regular '%'; to get the correct result you need to add another escaped '\'.
// TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
"pattern%",
"no-pattern\\%",
"pattern\\%",
"pattern\\\\%",
// Escaped characters.
// See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
"\u0000", // ASCII NUL (X'00')
"\"", // Double quote
"\b", // Backspace
"\n", // Newline
"\r", // Carriage return
"\t", // Tab character
// Octals
"Hello!",
// Unicode
"World :)")

val tests = Seq(("false", sqlCommands), ("true", fallbackSqlCommands))

tests.map { case (escapedStringLiterals, commands) =>
val conf = new SQLConf()
conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, escapedStringLiterals)
val parser = new CatalystSqlParser(conf)
commands.zip(expectedResults).foreach { case (sqlCommand, expected) =>
assertEqual(sqlCommand, expected, parser)
}
if (escapedStringLiterals == "false") {
assertEqual("'\\''", "\'", parser) // Single quote
assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows)
} else {
assertEqual("'pattern%'", "pattern%", parser)
assertEqual("'no-pattern\\%'", "no-pattern\\%", parser)

// tests that have different result regarding the conf
if (escape) {
// When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to
// Spark 1.6 behavior.

// 'LIKE' string literals.
assertEqual("'pattern\\\\%'", "pattern\\\\%", parser)
assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser)

// Escaped characters.
assertEqual("'\0'", "\u0000", parser) // ASCII NUL (X'00')

// Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is enabled.
val e = intercept[ParseException](parser.parseExpression("'\''"))
assert(e.message.contains("extraneous input '''"))

assertEqual("'\"'", "\"", parser) // Double quote
assertEqual("'\b'", "\b", parser) // Backspace
assertEqual("'\n'", "\n", parser) // Newline
assertEqual("'\r'", "\r", parser) // Carriage return
assertEqual("'\t'", "\t", parser) // Tab character

// Octals
assertEqual("'\110\145\154\154\157\041'", "Hello!", parser)
// Unicode
assertEqual("'\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'", "World :)", parser)
} else {
// Default behavior

// 'LIKE' string literals.
assertEqual("'pattern\\\\%'", "pattern\\%", parser)
assertEqual("'pattern\\\\\\%'", "pattern\\\\%", parser)

// Escaped characters.
// See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
assertEqual("'\\0'", "\u0000", parser) // ASCII NUL (X'00')
assertEqual("'\\''", "\'", parser) // Single quote
assertEqual("'\\\"'", "\"", parser) // Double quote
assertEqual("'\\b'", "\b", parser) // Backspace
assertEqual("'\\n'", "\n", parser) // Newline
assertEqual("'\\r'", "\r", parser) // Carriage return
assertEqual("'\\t'", "\t", parser) // Tab character
assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows)

// Octals
assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!", parser)

// Unicode
assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)",
parser)
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
checkDataset(ds, WithMapInOption(Some(Map(1 -> 1))))
}

test("do not unescaped regex pattern string") {
test("SPARK-20399: do not unescaped regex pattern when ESCAPED_STRING_LITERALS is enabled") {
withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> "true") {
val data = Seq("\u0020\u0021\u0023", "abc")
val df = data.toDF()
Expand Down

0 comments on commit 9ce7eb0

Please sign in to comment.