From 59af3970ee7e0e293f012f0ffa89edbdf1793a9b Mon Sep 17 00:00:00 2001 From: Zdenek Farana Date: Thu, 21 Aug 2014 19:05:53 +0200 Subject: [PATCH 1/7] Added a new TIMESTAMP keyword; CAST to TIMESTAMP now can be used in SQL expression. --- .../main/scala/org/apache/spark/sql/catalyst/SqlParser.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 2c73a80f64ebf..0afe750839338 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -114,6 +114,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers { protected val STRING = Keyword("STRING") protected val SUM = Keyword("SUM") protected val TABLE = Keyword("TABLE") + protected val TIMESTAMP = Keyword("TIMESTAMP") protected val TRUE = Keyword("TRUE") protected val UNCACHE = Keyword("UNCACHE") protected val UNION = Keyword("UNION") @@ -357,7 +358,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers { literal protected lazy val dataType: Parser[DataType] = - STRING ^^^ StringType + STRING ^^^ StringType | TIMESTAMP ^^^ TimestampType } class SqlLexical(val keywords: Seq[String]) extends StdLexical { From 4446b1e627e1f046ba6ac9087acab603c33bc1b6 Mon Sep 17 00:00:00 2001 From: Zdenek Farana Date: Wed, 27 Aug 2014 10:47:53 +0200 Subject: [PATCH 2/7] A string literal is casted into Timestamp when the column is Timestamp. --- .../spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 15eb5982a4a91..cd2b5777dc7d4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -218,11 +218,18 @@ trait HiveTypeCoercion { case a: BinaryArithmetic if a.right.dataType == StringType => a.makeCopy(Array(a.left, Cast(a.right, DoubleType))) + case p: BinaryPredicate if p.left.dataType == TimestampType + && p.right.dataType == StringType => + p.makeCopy(Array(p.left, Cast(p.right, TimestampType))) + case p: BinaryPredicate if p.left.dataType == StringType && p.right.dataType != StringType => p.makeCopy(Array(Cast(p.left, DoubleType), p.right)) case p: BinaryPredicate if p.left.dataType != StringType && p.right.dataType == StringType => p.makeCopy(Array(p.left, Cast(p.right, DoubleType))) + case i@In(a,b) if a.dataType == TimestampType && b.forall(_.dataType==StringType) => + i.makeCopy(Array(a,b.map(Cast(_,TimestampType)))) + case Sum(e) if e.dataType == StringType => Sum(Cast(e, DoubleType)) case Average(e) if e.dataType == StringType => From 491dfcf8d2d835c87a351e8c8bab4043bd874049 Mon Sep 17 00:00:00 2001 From: Zdenek Farana Date: Wed, 27 Aug 2014 10:49:31 +0200 Subject: [PATCH 3/7] Added test cases for SPARK-3173 --- .../org/apache/spark/sql/SQLQuerySuite.scala | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 9b2a36d33fca7..08bf2dd8184cc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -49,6 +49,30 @@ class SQLQuerySuite extends QueryTest { "st") } + test("SPARK-3173 Timestamp support in the parser") { + checkAnswer(sql( + "SELECT time FROM timestamps WHERE time=CAST('1970-01-01 01:00:00.001' AS TIMESTAMP)"), + Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.001")))) + + checkAnswer(sql( + "SELECT time FROM timestamps WHERE time='1970-01-01 01:00:00.001'"), + Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.001")))) + + checkAnswer(sql( + """SELECT time FROM timestamps WHERE time<'1970-01-01 01:00:00.003' + AND time>'1970-01-01 01:00:00.001'"""), + Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.002")))) + + checkAnswer(sql( + "SELECT time FROM timestamps WHERE time IN ('1970-01-01 01:00:00.001','1970-01-01 01:00:00.002')"), + Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.001")), + Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.002")))) + + checkAnswer(sql( + "SELECT time FROM timestamps WHERE time='123'"), + Nil) + } + test("index into array") { checkAnswer( sql("SELECT data, data[0], data[0] + data[1], data[0 + 1] FROM arrayData"), From 96a661b9802a1af6990d0d474fe73dc455b75bce Mon Sep 17 00:00:00 2001 From: Zdenek Farana Date: Thu, 28 Aug 2014 10:59:55 +0200 Subject: [PATCH 4/7] Code style change --- .../apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index cd2b5777dc7d4..5fb7879aeb8ac 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -227,7 +227,7 @@ trait HiveTypeCoercion { case p: BinaryPredicate if p.left.dataType != StringType && p.right.dataType == StringType => p.makeCopy(Array(p.left, Cast(p.right, DoubleType))) - case i@In(a,b) if a.dataType == TimestampType && b.forall(_.dataType==StringType) => + case i @ In(a,b) if a.dataType == TimestampType && b.forall(_.dataType == StringType) => i.makeCopy(Array(a,b.map(Cast(_,TimestampType)))) case Sum(e) if e.dataType == StringType => From 47b27b427c27018d92b7a2fdb68c397dbc7015c0 Mon Sep 17 00:00:00 2001 From: Zdenek Farana Date: Thu, 28 Aug 2014 11:13:57 +0200 Subject: [PATCH 5/7] Now works in the case of "StringLiteral=TimestampColumn" --- .../apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 3 +++ .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 5fb7879aeb8ac..e19c300b4769d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -218,6 +218,9 @@ trait HiveTypeCoercion { case a: BinaryArithmetic if a.right.dataType == StringType => a.makeCopy(Array(a.left, Cast(a.right, DoubleType))) + case p: BinaryPredicate if p.left.dataType == StringType + && p.right.dataType == TimestampType => + p.makeCopy(Array(Cast(p.left, TimestampType), p.right)) case p: BinaryPredicate if p.left.dataType == TimestampType && p.right.dataType == StringType => p.makeCopy(Array(p.left, Cast(p.right, TimestampType))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 08bf2dd8184cc..ff6a8cf81a18f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -58,6 +58,10 @@ class SQLQuerySuite extends QueryTest { "SELECT time FROM timestamps WHERE time='1970-01-01 01:00:00.001'"), Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.001")))) + checkAnswer(sql( + "SELECT time FROM timestamps WHERE '1970-01-01 01:00:00.001'=time"), + Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.001")))) + checkAnswer(sql( """SELECT time FROM timestamps WHERE time<'1970-01-01 01:00:00.003' AND time>'1970-01-01 01:00:00.001'"""), From 65b62158bc3a8bede6e803fe11e2d7da3a260a0f Mon Sep 17 00:00:00 2001 From: Zdenek Farana Date: Fri, 29 Aug 2014 10:47:29 +0200 Subject: [PATCH 6/7] Fixed timezone sensitivity in the test --- .../org/apache/spark/sql/SQLQuerySuite.scala | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 08bf2dd8184cc..5c981c3fa4bfd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -19,15 +19,28 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.test._ +import org.scalatest.BeforeAndAfterAll +import java.util.TimeZone /* Implicits */ import TestSQLContext._ import TestData._ -class SQLQuerySuite extends QueryTest { +class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { // Make sure the tables are loaded. TestData + var origZone: TimeZone = _ + override protected def beforeAll() { + origZone = TimeZone.getDefault + TimeZone.setDefault(TimeZone.getTimeZone("UTC")) + } + + override protected def afterAll() { + TimeZone.setDefault(origZone) + } + + test("SPARK-2041 column name equals tablename") { checkAnswer( sql("SELECT tableName FROM tableName"), @@ -51,22 +64,22 @@ class SQLQuerySuite extends QueryTest { test("SPARK-3173 Timestamp support in the parser") { checkAnswer(sql( - "SELECT time FROM timestamps WHERE time=CAST('1970-01-01 01:00:00.001' AS TIMESTAMP)"), - Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.001")))) + "SELECT time FROM timestamps WHERE time=CAST('1970-01-01 00:00:00.001' AS TIMESTAMP)"), + Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001")))) checkAnswer(sql( - "SELECT time FROM timestamps WHERE time='1970-01-01 01:00:00.001'"), - Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.001")))) + "SELECT time FROM timestamps WHERE time='1970-01-01 00:00:00.001'"), + Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001")))) checkAnswer(sql( - """SELECT time FROM timestamps WHERE time<'1970-01-01 01:00:00.003' - AND time>'1970-01-01 01:00:00.001'"""), - Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.002")))) + """SELECT time FROM timestamps WHERE time<'1970-01-01 00:00:00.003' + AND time>'1970-01-01 00:00:00.001'"""), + Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.002")))) checkAnswer(sql( - "SELECT time FROM timestamps WHERE time IN ('1970-01-01 01:00:00.001','1970-01-01 01:00:00.002')"), - Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.001")), - Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.002")))) + "SELECT time FROM timestamps WHERE time IN ('1970-01-01 00:00:00.001','1970-01-01 00:00:00.002')"), + Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001")), + Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.002")))) checkAnswer(sql( "SELECT time FROM timestamps WHERE time='123'"), From 442b59db8843d632ac975ea15b95daca86e58c53 Mon Sep 17 00:00:00 2001 From: Zdenek Farana Date: Fri, 29 Aug 2014 10:59:33 +0200 Subject: [PATCH 7/7] Fixed test merge conflict --- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 7c93318fe63da..93ebac50de11d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -76,9 +76,9 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001")))) checkAnswer(sql( - """SELECT time FROM timestamps WHERE time<'1970-01-01 01:00:00.003' + """SELECT time FROM timestamps WHERE time<'1970-01-01 00:00:00.003' AND time>'1970-01-01 00:00:00.001'"""), - Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 01:00:00.002")))) + Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.002")))) checkAnswer(sql( "SELECT time FROM timestamps WHERE time IN ('1970-01-01 00:00:00.001','1970-01-01 00:00:00.002')"),