From 683c52215ea1549b428d71042b035e75ae49387d Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 5 Nov 2015 09:48:34 +0800 Subject: [PATCH 1/3] cleanup --- .../sql/catalyst/AbstractSparkSQLParser.scala | 17 +++++++-------- .../apache/spark/sql/catalyst/SqlParser.scala | 17 +++------------ .../spark/sql/catalyst/SqlParserSuite.scala | 21 +++++++++++++++++++ 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala index 04ac4f20c66ec..ac39dcdab86a8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala @@ -82,10 +82,6 @@ class SqlLexical extends StdLexical { override def toString: String = chars } - case class DecimalLit(chars: String) extends Token { - override def toString: String = chars - } - /* This is a work around to support the lazy setting */ def initialize(keywords: Seq[String]): Unit = { reserved.clear() @@ -106,11 +102,10 @@ class SqlLexical extends StdLexical { } override lazy val token: Parser[Token] = - ( rep1(digit) ~ ('.' ~> digit.*).? ~ (exp ~> sign.? ~ rep1(digit)) ^^ { - case i ~ None ~ (sig ~ rest) => - DecimalLit(i.mkString + "e" + sig.mkString + rest.mkString) - case i ~ Some(d) ~ (sig ~ rest) => - DecimalLit(i.mkString + "." + d.mkString + "e" + sig.mkString + rest.mkString) + ( rep1(digit) ~ scientificNotation ^^ { case i ~ s => FloatLit(i.mkString + s) } + | '.' ~> (rep1(digit) ~ scientificNotation) ^^ { case i ~ s => FloatLit("0." + i.mkString + s) } + | rep1(digit) ~ ('.' ~> digit.*) ~ scientificNotation ^^ { + case i1 ~ i2 ~ s => FloatLit(i1.mkString + "." + i2.mkString + s) } | digit.* ~ identChar ~ (identChar | digit).* ^^ { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) } @@ -133,6 +128,10 @@ class SqlLexical extends StdLexical { override def identChar: Parser[Elem] = letter | elem('_') + private lazy val scientificNotation: Parser[String] = + exp ~> sign.? ~ rep1(digit) ^^ { + case s ~ rest => "e" + s.mkString + rest.mkString + } private lazy val sign: Parser[Elem] = elem("s", c => c == '+' || c == '-') private lazy val exp: Parser[Elem] = elem("e", c => c == 'E' || c == 'e') diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 440e9e28fa783..ebcc168a16bab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -335,11 +335,8 @@ object SqlParser extends AbstractSparkSQLParser with DataTypeParser { protected lazy val numericLiteral: Parser[Literal] = ( integral ^^ { case i => Literal(toNarrowestIntegerType(i)) } | sign.? ~ unsignedFloat ^^ { - case s ~ f => Literal(toDecimalOrDouble(s.getOrElse("") + f)) - } - | sign.? ~ unsignedDecimal ^^ { - case s ~ d => Literal(toDecimalOrDouble(s.getOrElse("") + d)) - } + case s ~ f => Literal(toDecimalOrDouble(s.getOrElse("") + f)) + } ) protected lazy val unsignedFloat: Parser[String] = @@ -347,14 +344,6 @@ object SqlParser extends AbstractSparkSQLParser with DataTypeParser { | elem("decimal", _.isInstanceOf[lexical.FloatLit]) ^^ (_.chars) ) - protected lazy val unsignedDecimal: Parser[String] = - ( "." ~> decimalLit ^^ { u => "0." + u } - | elem("scientific_notation", _.isInstanceOf[lexical.DecimalLit]) ^^ (_.chars) - ) - - def decimalLit: Parser[String] = - elem("scientific_notation", _.isInstanceOf[lexical.DecimalLit]) ^^ (_.chars) - protected lazy val sign: Parser[String] = ("+" | "-") protected lazy val integral: Parser[String] = @@ -477,7 +466,7 @@ object SqlParser extends AbstractSparkSQLParser with DataTypeParser { protected lazy val baseExpression: Parser[Expression] = ( "*" ^^^ UnresolvedStar(None) - | (ident <~ "."). + <~ "*" ^^ { case target => UnresolvedStar(Option(target))} + | rep1(ident <~ ".") <~ "*" ^^ { case target => UnresolvedStar(Option(target))} | primary ) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala index ea28bfa021bed..9ff893b84775b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala @@ -126,4 +126,25 @@ class SqlParserSuite extends PlanTest { checkSingleUnit("13.123456789", "second") checkSingleUnit("-13.123456789", "second") } + + test("support scientific notation") { + def assertRight(input: String, output: Double): Unit = { + val parsed = SqlParser.parse("SELECT " + input) + val expected = Project( + UnresolvedAlias( + Literal(output) + ) :: Nil, + OneRowRelation) + comparePlans(parsed, expected) + } + + assertRight("9.0e1", 90) + assertRight(".9e+2", 90) + assertRight("0.9e+2", 90) + assertRight("900e-1", 90) + assertRight("900.0E-1", 90) + assertRight("9.e+1", 90) + + intercept[RuntimeException](SqlParser.parse("SELECT .e3")) + } } From ed981d27f4bfb2dc6b8399e713004d8ac00a9ccf Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 6 Nov 2015 10:58:46 +0800 Subject: [PATCH 2/3] rename to DecimalLit --- .../sql/catalyst/AbstractSparkSQLParser.scala | 14 ++++++-------- .../org/apache/spark/sql/catalyst/SqlParser.scala | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala index ac39dcdab86a8..7d80699f0971f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala @@ -78,7 +78,7 @@ private[sql] abstract class AbstractSparkSQLParser } class SqlLexical extends StdLexical { - case class FloatLit(chars: String) extends Token { + case class DecimalLit(chars: String) extends Token { override def toString: String = chars } @@ -102,16 +102,16 @@ class SqlLexical extends StdLexical { } override lazy val token: Parser[Token] = - ( rep1(digit) ~ scientificNotation ^^ { case i ~ s => FloatLit(i.mkString + s) } - | '.' ~> (rep1(digit) ~ scientificNotation) ^^ { case i ~ s => FloatLit("0." + i.mkString + s) } + ( rep1(digit) ~ scientificNotation ^^ { case i ~ s => DecimalLit(i.mkString + s) } + | '.' ~> (rep1(digit) ~ scientificNotation) ^^ { case i ~ s => DecimalLit("0." + i.mkString + s) } | rep1(digit) ~ ('.' ~> digit.*) ~ scientificNotation ^^ { - case i1 ~ i2 ~ s => FloatLit(i1.mkString + "." + i2.mkString + s) + case i1 ~ i2 ~ s => DecimalLit(i1.mkString + "." + i2.mkString + s) } | digit.* ~ identChar ~ (identChar | digit).* ^^ { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) } | rep1(digit) ~ ('.' ~> digit.*).? ^^ { case i ~ None => NumericLit(i.mkString) - case i ~ Some(d) => FloatLit(i.mkString + "." + d.mkString) + case i ~ Some(d) => DecimalLit(i.mkString + "." + d.mkString) } | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^ { case chars => StringLit(chars mkString "") } @@ -129,11 +129,9 @@ class SqlLexical extends StdLexical { override def identChar: Parser[Elem] = letter | elem('_') private lazy val scientificNotation: Parser[String] = - exp ~> sign.? ~ rep1(digit) ^^ { + (elem('e') | elem('E')) ~> (elem('+') | elem('-')).? ~ rep1(digit) ^^ { case s ~ rest => "e" + s.mkString + rest.mkString } - private lazy val sign: Parser[Elem] = elem("s", c => c == '+' || c == '-') - private lazy val exp: Parser[Elem] = elem("e", c => c == 'E' || c == 'e') override def whitespace: Parser[Any] = ( whitespaceChar diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index ebcc168a16bab..415c516026826 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -341,7 +341,7 @@ object SqlParser extends AbstractSparkSQLParser with DataTypeParser { protected lazy val unsignedFloat: Parser[String] = ( "." ~> numericLit ^^ { u => "0." + u } - | elem("decimal", _.isInstanceOf[lexical.FloatLit]) ^^ (_.chars) + | elem("decimal", _.isInstanceOf[lexical.DecimalLit]) ^^ (_.chars) ) protected lazy val sign: Parser[String] = ("+" | "-") From 3d583cd7145a8f1a438277f8cb95347638e0452d Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 6 Nov 2015 11:49:41 +0800 Subject: [PATCH 3/3] fix style --- .../spark/sql/catalyst/AbstractSparkSQLParser.scala | 8 ++++---- .../scala/org/apache/spark/sql/catalyst/SqlParser.scala | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala index 7d80699f0971f..bdc52c08acb66 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala @@ -103,10 +103,10 @@ class SqlLexical extends StdLexical { override lazy val token: Parser[Token] = ( rep1(digit) ~ scientificNotation ^^ { case i ~ s => DecimalLit(i.mkString + s) } - | '.' ~> (rep1(digit) ~ scientificNotation) ^^ { case i ~ s => DecimalLit("0." + i.mkString + s) } - | rep1(digit) ~ ('.' ~> digit.*) ~ scientificNotation ^^ { - case i1 ~ i2 ~ s => DecimalLit(i1.mkString + "." + i2.mkString + s) - } + | '.' ~> (rep1(digit) ~ scientificNotation) ^^ + { case i ~ s => DecimalLit("0." + i.mkString + s) } + | rep1(digit) ~ ('.' ~> digit.*) ~ scientificNotation ^^ + { case i1 ~ i2 ~ s => DecimalLit(i1.mkString + "." + i2.mkString + s) } | digit.* ~ identChar ~ (identChar | digit).* ^^ { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) } | rep1(digit) ~ ('.' ~> digit.*).? ^^ { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 415c516026826..cd717c09f8e5e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -334,9 +334,8 @@ object SqlParser extends AbstractSparkSQLParser with DataTypeParser { protected lazy val numericLiteral: Parser[Literal] = ( integral ^^ { case i => Literal(toNarrowestIntegerType(i)) } - | sign.? ~ unsignedFloat ^^ { - case s ~ f => Literal(toDecimalOrDouble(s.getOrElse("") + f)) - } + | sign.? ~ unsignedFloat ^^ + { case s ~ f => Literal(toDecimalOrDouble(s.getOrElse("") + f)) } ) protected lazy val unsignedFloat: Parser[String] =