Skip to content

Commit

Permalink
Merged ParserUtils and ParseUtils
Browse files Browse the repository at this point in the history
  • Loading branch information
sarutak committed Apr 6, 2016
1 parent 1146c53 commit e54bcc6
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 136 deletions.

This file was deleted.

Expand Up @@ -16,11 +16,12 @@
*/
package org.apache.spark.sql.catalyst.parser

import scala.collection.mutable.StringBuilder

import org.antlr.v4.runtime.{CharStream, ParserRuleContext, Token}
import org.antlr.v4.runtime.misc.Interval
import org.antlr.v4.runtime.tree.TerminalNode

import org.apache.spark.sql.catalyst.parser.ParseUtils.unescapeSQLString
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}

Expand Down Expand Up @@ -87,6 +88,81 @@ object ParserUtils {
}
}

/** Unescape baskslash-escaped string enclosed by quotes. */
def unescapeSQLString(b: String): String = {
var enclosure: Character = null
val sb = new StringBuilder(b.length())

def appendEscapedChar(n: Char) {
n match {
case '0' => sb.append('\u0000')
case '\'' => sb.append('\'')
case '"' => sb.append('\"')
case 'b' => sb.append('\b')
case 'n' => sb.append('\n')
case 'r' => sb.append('\r')
case 't' => sb.append('\t')
case 'Z' => sb.append('\u001A')
case '\\' => sb.append('\\')
// The following 2 lines are exactly what MySQL does TODO: why do we do this?
case '%' => sb.append("\\%")
case '_' => sb.append("\\_")
case _ => sb.append(n)
}
}

var i = 0
val strLength = b.length
while (i < strLength) {
val currentChar = b.charAt(i)
if (enclosure == null) {
if (currentChar == '\'' || currentChar == '\"') {
enclosure = currentChar
}
} else if (enclosure == currentChar) {
enclosure = null
} else if (currentChar == '\\') {

if ((i + 6 < strLength) && b.charAt(i + 1) == 'u') {
// \u0000 style character literals.

val base = i + 2
val code = (0 until 4).foldLeft(0) { (mid, j) =>
val digit = Character.digit(b.charAt(j + base), 16)
(mid << 4) + digit
}
sb.append(code.asInstanceOf[Char])
i += 5
} else if (i + 4 < strLength) {
// \000 style character literals.

val i1 = b.charAt(i + 1)
val i2 = b.charAt(i + 2)
val i3 = b.charAt(i + 3)

if ((i1 >= '0' && i1 <= '1') && (i2 >= '0' && i2 <= '7') && (i3 >= '0' && i3 <= '7')) {
val tmp = ((i3 - '0') + ((i2 - '0') << 3) + ((i1 - '0') << 6)).asInstanceOf[Char]
sb.append(tmp)
i += 3
} else {
appendEscapedChar(i1)
i += 1
}
} else if (i + 2 < strLength) {
// escaped character literals.
val n = b.charAt(i + 1)
appendEscapedChar(n)
i += 1
}
} else {
// non-escaped character literals.
sb.append(currentChar)
}
i += 1
}
sb.toString()
}

/** Some syntactic sugar which makes it easier to work with optional clauses for LogicalPlans. */
implicit class EnhancedLogicalPlan(val plan: LogicalPlan) extends AnyVal {
/**
Expand Down
@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.parser

import org.apache.spark.SparkFunSuite

class ParserUtilsSuite extends SparkFunSuite {

import ParserUtils._

test("unescapeSQLString") {

// String not including escaped characters and enclosed by double quotes.
assert(unescapeSQLString(""""abcdefg"""") == "abcdefg")

// String enclosed by single quotes.
assert(unescapeSQLString("""'C0FFEE'""") == "C0FFEE")

// Strings including single escaped characters.
assert(unescapeSQLString("""'\0'""") == "\u0000")
assert(unescapeSQLString(""""\'"""") == "\'")
assert(unescapeSQLString("""'\"'""") == "\"")
assert(unescapeSQLString(""""\b"""") == "\b")
assert(unescapeSQLString("""'\n'""") == "\n")
assert(unescapeSQLString(""""\r"""") == "\r")
assert(unescapeSQLString("""'\t'""") == "\t")
assert(unescapeSQLString(""""\Z"""") == "\u001A")
assert(unescapeSQLString("""'\\'""") == "\\")
assert(unescapeSQLString(""""\%"""") == "\\%")
assert(unescapeSQLString("""'\_'""") == "\\_")

// String including '\000' style literal characters.
assert(unescapeSQLString("""'3 + 5 = \070'""") == "3 + 5 = \u0038")
assert(unescapeSQLString(""""\000"""") == "\u0000")

// String including invalid '\000' style literal characters.
assert(unescapeSQLString(""""\256"""") == "256")

// String including a '\u0000' style literal characters (\u732B is a cat in Kanji).
assert(unescapeSQLString(""""How cute \u732B are"""") == "How cute \u732B are")

// String including a surrogate pair character
// (\uD867\uDE3D is Okhotsk atka mackerel in Kanji).
assert(unescapeSQLString(""""\uD867\uDE3D is a fish"""") == "\uD867\uDE3D is a fish")
}

// TODO: Add test cases for other methods in ParserUtils
}

0 comments on commit e54bcc6

Please sign in to comment.