From 2fc93649b2e7294d79ab7c7c5c591b77d46d9c54 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 22 Feb 2016 14:58:05 -0800 Subject: [PATCH 1/2] Comprehensive SQL generation support for expressions --- .../catalyst/analysis/FunctionRegistry.scala | 44 +-- .../catalyst/expressions/InputFileName.scala | 4 +- .../aggregate/HyperLogLogPlusPlus.scala | 2 + .../sql/catalyst/expressions/arithmetic.scala | 2 - .../spark/sql/catalyst/expressions/misc.scala | 2 - .../org/apache/spark/sql/functions.scala | 2 +- .../spark/sql/hive/ExpressionToSQLSuite.scala | 271 ++++++++++++++++++ .../sql/hive/LogicalPlanToSQLSuite.scala | 8 + 8 files changed, 305 insertions(+), 30 deletions(-) create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 1be97c7b81197..14e0612835611 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -125,13 +125,12 @@ object FunctionRegistry { expression[IsNull]("isnull"), expression[IsNotNull]("isnotnull"), expression[Least]("least"), + expression[CreateNamedStruct]("named_struct"), + expression[NaNvl]("nanvl"), expression[Coalesce]("nvl"), expression[Rand]("rand"), expression[Randn]("randn"), expression[CreateStruct]("struct"), - expression[CreateNamedStruct]("named_struct"), - expression[Sqrt]("sqrt"), - expression[NaNvl]("nanvl"), // math functions expression[Acos]("acos"), @@ -145,24 +144,26 @@ object FunctionRegistry { expression[Cos]("cos"), expression[Cosh]("cosh"), expression[Conv]("conv"), + expression[ToDegrees]("degrees"), expression[EulerNumber]("e"), expression[Exp]("exp"), expression[Expm1]("expm1"), expression[Floor]("floor"), expression[Factorial]("factorial"), - expression[Hypot]("hypot"), expression[Hex]("hex"), + expression[Hypot]("hypot"), expression[Logarithm]("log"), - expression[Log]("ln"), expression[Log10]("log10"), expression[Log1p]("log1p"), expression[Log2]("log2"), + expression[Log]("ln"), expression[UnaryMinus]("negative"), expression[Pi]("pi"), - expression[Pow]("pow"), - expression[Pow]("power"), expression[Pmod]("pmod"), expression[UnaryPositive]("positive"), + expression[Pow]("pow"), + expression[Pow]("power"), + expression[ToRadians]("radians"), expression[Rint]("rint"), expression[Round]("round"), expression[ShiftLeft]("shiftleft"), @@ -172,10 +173,9 @@ object FunctionRegistry { expression[Signum]("signum"), expression[Sin]("sin"), expression[Sinh]("sinh"), + expression[Sqrt]("sqrt"), expression[Tan]("tan"), expression[Tanh]("tanh"), - expression[ToDegrees]("degrees"), - expression[ToRadians]("radians"), // aggregate functions expression[HyperLogLogPlusPlus]("approx_count_distinct"), @@ -186,11 +186,13 @@ object FunctionRegistry { expression[CovSample]("covar_samp"), expression[First]("first"), expression[First]("first_value"), + expression[Kurtosis]("kurtosis"), expression[Last]("last"), expression[Last]("last_value"), expression[Max]("max"), expression[Average]("mean"), expression[Min]("min"), + expression[Skewness]("skewness"), expression[StddevSamp]("stddev"), expression[StddevPop]("stddev_pop"), expression[StddevSamp]("stddev_samp"), @@ -198,36 +200,34 @@ object FunctionRegistry { expression[VarianceSamp]("variance"), expression[VariancePop]("var_pop"), expression[VarianceSamp]("var_samp"), - expression[Skewness]("skewness"), - expression[Kurtosis]("kurtosis"), // string functions expression[Ascii]("ascii"), expression[Base64]("base64"), expression[Concat]("concat"), expression[ConcatWs]("concat_ws"), - expression[Encode]("encode"), expression[Decode]("decode"), + expression[Encode]("encode"), expression[FindInSet]("find_in_set"), expression[FormatNumber]("format_number"), + expression[FormatString]("format_string"), expression[GetJsonObject]("get_json_object"), expression[InitCap]("initcap"), - expression[JsonTuple]("json_tuple"), + expression[StringInstr]("instr"), expression[Lower]("lcase"), - expression[Lower]("lower"), expression[Length]("length"), expression[Levenshtein]("levenshtein"), - expression[RegExpExtract]("regexp_extract"), - expression[RegExpReplace]("regexp_replace"), - expression[StringInstr]("instr"), + expression[Lower]("lower"), expression[StringLocate]("locate"), expression[StringLPad]("lpad"), expression[StringTrimLeft]("ltrim"), - expression[FormatString]("format_string"), + expression[JsonTuple]("json_tuple"), expression[FormatString]("printf"), - expression[StringRPad]("rpad"), + expression[RegExpExtract]("regexp_extract"), + expression[RegExpReplace]("regexp_replace"), expression[StringRepeat]("repeat"), expression[StringReverse]("reverse"), + expression[StringRPad]("rpad"), expression[StringTrimRight]("rtrim"), expression[SoundEx]("soundex"), expression[StringSpace]("space"), @@ -237,8 +237,8 @@ object FunctionRegistry { expression[SubstringIndex]("substring_index"), expression[StringTranslate]("translate"), expression[StringTrim]("trim"), - expression[UnBase64]("unbase64"), expression[Upper]("ucase"), + expression[UnBase64]("unbase64"), expression[Unhex]("unhex"), expression[Upper]("upper"), @@ -246,7 +246,6 @@ object FunctionRegistry { expression[AddMonths]("add_months"), expression[CurrentDate]("current_date"), expression[CurrentTimestamp]("current_timestamp"), - expression[CurrentTimestamp]("now"), expression[DateDiff]("datediff"), expression[DateAdd]("date_add"), expression[DateFormatClass]("date_format"), @@ -262,6 +261,7 @@ object FunctionRegistry { expression[Month]("month"), expression[MonthsBetween]("months_between"), expression[NextDay]("next_day"), + expression[CurrentTimestamp]("now"), expression[Quarter]("quarter"), expression[Second]("second"), expression[ToDate]("to_date"), @@ -273,9 +273,9 @@ object FunctionRegistry { expression[Year]("year"), // collection functions + expression[ArrayContains]("array_contains"), expression[Size]("size"), expression[SortArray]("sort_array"), - expression[ArrayContains]("array_contains"), // misc functions expression[Crc32]("crc32"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala index c49c601c3034b..dbd0acf06caa7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala @@ -35,7 +35,7 @@ case class InputFileName() extends LeafExpression with Nondeterministic { override def dataType: DataType = StringType - override val prettyName = "INPUT_FILE_NAME" + override def prettyName: String = "input_file_name" override protected def initInternal(): Unit = {} @@ -48,6 +48,4 @@ case class InputFileName() extends LeafExpression with Nondeterministic { s"final ${ctx.javaType(dataType)} ${ev.value} = " + "org.apache.spark.rdd.SqlNewHadoopRDDState.getInputFileName();" } - - override def sql: String = prettyName } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala index ec833d6789e85..f82d870e3f25d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala @@ -68,6 +68,8 @@ case class HyperLogLogPlusPlus( inputAggBufferOffset = 0) } + override def prettyName: String = "approx_count_distinct" + override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate = copy(mutableAggBufferOffset = newMutableAggBufferOffset) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 5af234609ddd4..ed812e06799a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -95,8 +95,6 @@ case class Abs(child: Expression) extends UnaryExpression with ExpectsInputTypes } protected override def nullSafeEval(input: Any): Any = numeric.abs(input) - - override def sql: String = s"$prettyName(${child.sql})" } abstract class BinaryArithmetic extends BinaryOperator { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index dcbb594afd86e..33bd3f20959b8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -234,8 +234,6 @@ case class Murmur3Hash(children: Seq[Expression], seed: Int) extends Expression override def prettyName: String = "hash" - override def sql: String = s"$prettyName(${children.map(_.sql).mkString(", ")}, $seed)" - override def eval(input: InternalRow): Any = { var hash = seed var i = 0 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 97c6992e18753..7266cd7c6f283 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1972,7 +1972,7 @@ object functions extends LegacyFunctions { def crc32(e: Column): Column = withExpr { Crc32(e.expr) } /** - * Calculates the hash code of given columns, and returns the result as a int column. + * Calculates the hash code of given columns, and returns the result as an int column. * * @group misc_funcs * @since 2.0 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala new file mode 100644 index 0000000000000..07f3c2bd0269a --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive + +import scala.util.control.NonFatal + +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils + +class ExpressionToSQLSuite extends SQLBuilderTest with SQLTestUtils { + import testImplicits._ + + protected override def beforeAll(): Unit = { + sql("DROP TABLE IF EXISTS t0") + sql("DROP TABLE IF EXISTS t1") + sql("DROP TABLE IF EXISTS t2") + + val bytes = Array[Byte](1, 2, 3, 4) + Seq((bytes, "AQIDBA==")).toDF("a", "b").write.saveAsTable("t0") + + sqlContext + .range(10) + .select('id as 'key, concat(lit("val_"), 'id) as 'value) + .write + .saveAsTable("t1") + + sqlContext.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd).write.saveAsTable("t2") + } + + override protected def afterAll(): Unit = { + sql("DROP TABLE IF EXISTS t0") + sql("DROP TABLE IF EXISTS t1") + sql("DROP TABLE IF EXISTS t2") + } + + private def checkHiveQl(hiveQl: String): Unit = { + val df = sql(hiveQl) + + val convertedSQL = try new SQLBuilder(df).toSQL catch { + case NonFatal(e) => + fail( + s"""Cannot convert the following HiveQL query plan back to SQL query string: + | + |# Original HiveQL query string: + |$hiveQl + | + |# Resolved query plan: + |${df.queryExecution.analyzed.treeString} + """.stripMargin) + } + + try { + checkAnswer(sql(convertedSQL), df) + } catch { case cause: Throwable => + fail( + s"""Failed to execute converted SQL string or got wrong answer: + | + |# Converted SQL query string: + |$convertedSQL + | + |# Original HiveQL query string: + |$hiveQl + | + |# Resolved query plan: + |${df.queryExecution.analyzed.treeString} + """.stripMargin, + cause) + } + } + + test("misc non-aggregate functions") { + checkHiveQl("SELECT abs(15), abs(-15)") + checkHiveQl("SELECT array(1,2,3)") + checkHiveQl("SELECT coalesce(null, 1, 2)") + // wait for resolution of JIRA SPARK-12719 SQL Generation for Generators + // checkHiveQl("SELECT explode(array(1,2,3))") + checkHiveQl("SELECT greatest(1,null,3)") + checkHiveQl("SELECT if(1==2, 'yes', 'no')") + checkHiveQl("SELECT isnan(15), isnan('invalid')") + checkHiveQl("SELECT isnull(null), isnull('a')") + checkHiveQl("SELECT isnotnull(null), isnotnull('a')") + checkHiveQl("SELECT least(1,null,3)") + checkHiveQl("SELECT named_struct('c1',1,'c2',2,'c3',3)") + checkHiveQl("SELECT nanvl(a, 5), nanvl(b, 10), nanvl(d, c) from t2") + checkHiveQl("SELECT nvl(null, 1, 2)") + checkHiveQl("SELECT rand(1)") + checkHiveQl("SELECT randn(3)") + checkHiveQl("SELECT struct(1,2,3)") + } + + test("math functions") { + checkHiveQl("SELECT acos(-1)") + checkHiveQl("SELECT asin(-1)") + checkHiveQl("SELECT atan(1)") + checkHiveQl("SELECT atan2(1, 1)") + checkHiveQl("SELECT bin(10)") + checkHiveQl("SELECT cbrt(1000.0)") + checkHiveQl("SELECT ceil(2.333)") + checkHiveQl("SELECT ceiling(2.333)") + checkHiveQl("SELECT cos(1.0)") + checkHiveQl("SELECT cosh(1.0)") + checkHiveQl("SELECT conv(15, 10, 16)") + checkHiveQl("SELECT degrees(pi())") + checkHiveQl("SELECT e()") + checkHiveQl("SELECT exp(1.0)") + checkHiveQl("SELECT expm1(1.0)") + checkHiveQl("SELECT floor(-2.333)") + checkHiveQl("SELECT factorial(5)") + checkHiveQl("SELECT hex(10)") + checkHiveQl("SELECT hypot(3, 4)") + checkHiveQl("SELECT log(10.0)") + checkHiveQl("SELECT log10(1000.0)") + checkHiveQl("SELECT log1p(0.0)") + checkHiveQl("SELECT log2(8.0)") + checkHiveQl("SELECT ln(10.0)") + checkHiveQl("SELECT negative(-1)") + checkHiveQl("SELECT pi()") + checkHiveQl("SELECT pmod(3, 2)") + checkHiveQl("SELECT positive(3)") + checkHiveQl("SELECT pow(2, 3)") + checkHiveQl("SELECT power(2, 3)") + checkHiveQl("SELECT radians(180.0)") + checkHiveQl("SELECT rint(1.63)") + checkHiveQl("SELECT round(31.415, -1)") + checkHiveQl("SELECT shiftleft(2, 3)") + checkHiveQl("SELECT shiftright(16, 3)") + checkHiveQl("SELECT shiftrightunsigned(16, 3)") + checkHiveQl("SELECT sign(-2.63)") + checkHiveQl("SELECT signum(-2.63)") + checkHiveQl("SELECT sin(1.0)") + checkHiveQl("SELECT sinh(1.0)") + checkHiveQl("SELECT sqrt(100.0)") + checkHiveQl("SELECT tan(1.0)") + checkHiveQl("SELECT tanh(1.0)") + } + + test("aggregate functions") { + checkHiveQl("SELECT approx_count_distinct(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT avg(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT corr(value, key) FROM t1 GROUP BY key") + checkHiveQl("SELECT count(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT covar_pop(value, key) FROM t1 GROUP BY key") + checkHiveQl("SELECT covar_samp(value, key) FROM t1 GROUP BY key") + checkHiveQl("SELECT first(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT first_value(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT kurtosis(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT last(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT last_value(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT max(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT mean(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT min(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT skewness(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT stddev(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT stddev_pop(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT stddev_samp(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT sum(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT variance(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT var_pop(value) FROM t1 GROUP BY key") + checkHiveQl("SELECT var_samp(value) FROM t1 GROUP BY key") + } + + test("string functions") { + checkHiveQl("SELECT ascii('SparkSql')") + checkHiveQl("SELECT base64(a) FROM t0") + checkHiveQl("SELECT concat('This ', 'is ', 'a ', 'test')") + checkHiveQl("SELECT concat_ws(' ', 'This', 'is', 'a', 'test')") + checkHiveQl("SELECT decode(a, 'UTF-8') FROM t0") + checkHiveQl("SELECT encode('SparkSql', 'UTF-8')") + checkHiveQl("SELECT find_in_set('ab', 'abc,b,ab,c,def')") + checkHiveQl("SELECT format_number(1234567.890, 2)") + checkHiveQl("SELECT format_string('aa%d%s',123, 'cc')") + checkHiveQl("SELECT get_json_object('{\"a\":\"bc\"}','$.a')") + checkHiveQl("SELECT initcap('This is a test')") + checkHiveQl("SELECT instr('This is a test', 'is')") + checkHiveQl("SELECT lcase('SparkSql')") + checkHiveQl("SELECT length('This is a test')") + checkHiveQl("SELECT levenshtein('This is a test', 'Another test')") + checkHiveQl("SELECT lower('SparkSql')") + checkHiveQl("SELECT locate('is', 'This is a test', 3)") + checkHiveQl("SELECT lpad('SparkSql', 16, 'Learning')") + checkHiveQl("SELECT ltrim(' SparkSql ')") + // wait for resolution of JIRA SPARK-12719 SQL Generation for Generators + // checkHiveQl("SELECT json_tuple('{\"f1\": \"value1\", \"f2\": \"value2\"}','f1')") + checkHiveQl("SELECT printf('aa%d%s', 123, 'cc')") + checkHiveQl("SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1)") + checkHiveQl("SELECT regexp_replace('100-200', '(\\d+)', 'num')") + checkHiveQl("SELECT repeat('SparkSql', 3)") + checkHiveQl("SELECT reverse('SparkSql')") + checkHiveQl("SELECT rpad('SparkSql', 16, ' is Cool')") + checkHiveQl("SELECT rtrim(' SparkSql ')") + checkHiveQl("SELECT soundex('SparkSql')") + checkHiveQl("SELECT space(2)") + checkHiveQl("SELECT split('aa2bb3cc', '[1-9]+')") + checkHiveQl("SELECT space(2)") + checkHiveQl("SELECT substr('This is a test', 'is')") + checkHiveQl("SELECT substring('This is a test', 'is')") + checkHiveQl("SELECT substring_index('www.apache.org','.',1)") + checkHiveQl("SELECT translate('translate', 'rnlt', '123')") + checkHiveQl("SELECT trim(' SparkSql ')") + checkHiveQl("SELECT ucase('SparkSql')") + checkHiveQl("SELECT unbase64('SparkSql')") + checkHiveQl("SELECT unhex(41)") + checkHiveQl("SELECT upper('SparkSql')") + } + + test("datetime functions") { + checkHiveQl("SELECT add_months('2001-03-31', 1)") + checkHiveQl("SELECT count(current_date())") + checkHiveQl("SELECT count(current_timestamp())") + checkHiveQl("SELECT datediff('2001-01-02', '2001-01-01')") + checkHiveQl("SELECT date_add('2001-01-02', 1)") + checkHiveQl("SELECT date_format('2001-05-02', 'yyyy-dd')") + checkHiveQl("SELECT date_sub('2001-01-02', 1)") + checkHiveQl("SELECT day('2001-05-02')") + checkHiveQl("SELECT dayofyear('2001-05-02')") + checkHiveQl("SELECT dayofmonth('2001-05-02')") + checkHiveQl("SELECT from_unixtime(1000, 'yyyy-MM-dd HH:mm:ss')") + checkHiveQl("SELECT from_utc_timestamp('2015-07-24 00:00:00', 'PST')") + checkHiveQl("SELECT hour('11:35:55')") + checkHiveQl("SELECT last_day('2001-01-01')") + checkHiveQl("SELECT minute('11:35:55')") + checkHiveQl("SELECT month('2001-05-02')") + checkHiveQl("SELECT months_between('2001-10-30 10:30:00', '1996-10-30')") + checkHiveQl("SELECT next_day('2001-05-02', 'TU')") + checkHiveQl("SELECT count(now())") + checkHiveQl("SELECT quarter('2001-05-02')") + checkHiveQl("SELECT second('11:35:55')") + checkHiveQl("SELECT to_date('2001-10-30 10:30:00')") + checkHiveQl("SELECT to_unix_timestamp('2015-07-24 00:00:00', 'yyyy-MM-dd HH:mm:ss')") + checkHiveQl("SELECT to_utc_timestamp('2015-07-24 00:00:00', 'PST')") + checkHiveQl("SELECT trunc('2001-10-30 10:30:00', 'YEAR')") + checkHiveQl("SELECT unix_timestamp('2001-10-30 10:30:00')") + checkHiveQl("SELECT weekofyear('2001-05-02')") + checkHiveQl("SELECT year('2001-05-02')") + + checkHiveQl("SELECT interval 3 years - 3 month 7 week 123 microseconds as i") + } + + test("collection functions") { + checkHiveQl("SELECT array_contains(array(2, 9, 8), 9)") + checkHiveQl("SELECT size(array('b', 'd', 'c', 'a'))") + checkHiveQl("SELECT sort_array(array('b', 'd', 'c', 'a'))") + } + + test("misc functions") { + checkHiveQl("SELECT crc32('Spark')") + checkHiveQl("SELECT md5('Spark')") + checkHiveQl("SELECT hash('Spark')") + checkHiveQl("SELECT sha('Spark')") + checkHiveQl("SELECT sha1('Spark')") + checkHiveQl("SELECT sha2('Spark', 0)") + checkHiveQl("SELECT spark_partition_id()") + checkHiveQl("SELECT input_file_name()") + checkHiveQl("SELECT monotonically_increasing_id()") + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala index 5255b150aa6be..b162adf215d74 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala @@ -86,6 +86,14 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { checkHiveQl("SELECT id FROM t0 WHERE id IN (1, 2, 3)") } + test("not in") { + checkHiveQl("SELECT id FROM t0 WHERE id NOT IN (1, 2, 3)") + } + + test("not like") { + checkHiveQl("SELECT id FROM t0 WHERE id + 5 NOT LIKE '1%'") + } + test("aggregate function in having clause") { checkHiveQl("SELECT COUNT(value) FROM t1 GROUP BY key HAVING MAX(key) > 0") } From 1620378727920df24bf0950249e67dfc020f9be0 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 22 Feb 2016 19:49:44 -0800 Subject: [PATCH 2/2] address comments. --- .../catalyst/analysis/FunctionRegistry.scala | 1 + .../spark/sql/hive/ExpressionToSQLSuite.scala | 326 +++++++++--------- 2 files changed, 164 insertions(+), 163 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 14e0612835611..26bb96eb085ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -113,6 +113,7 @@ object FunctionRegistry { type FunctionBuilder = Seq[Expression] => Expression + // Note: Whenever we add a new entry here, make sure we also update ExpressionToSQLSuite val expressions: Map[String, (ExpressionInfo, FunctionBuilder)] = Map( // misc non-aggregate functions expression[Abs]("abs"), diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala index 07f3c2bd0269a..d68c602a887f7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ExpressionToSQLSuite.scala @@ -48,7 +48,7 @@ class ExpressionToSQLSuite extends SQLBuilderTest with SQLTestUtils { sql("DROP TABLE IF EXISTS t2") } - private def checkHiveQl(hiveQl: String): Unit = { + private def checkSqlGeneration(hiveQl: String): Unit = { val df = sql(hiveQl) val convertedSQL = try new SQLBuilder(df).toSQL catch { @@ -84,188 +84,188 @@ class ExpressionToSQLSuite extends SQLBuilderTest with SQLTestUtils { } test("misc non-aggregate functions") { - checkHiveQl("SELECT abs(15), abs(-15)") - checkHiveQl("SELECT array(1,2,3)") - checkHiveQl("SELECT coalesce(null, 1, 2)") + checkSqlGeneration("SELECT abs(15), abs(-15)") + checkSqlGeneration("SELECT array(1,2,3)") + checkSqlGeneration("SELECT coalesce(null, 1, 2)") // wait for resolution of JIRA SPARK-12719 SQL Generation for Generators - // checkHiveQl("SELECT explode(array(1,2,3))") - checkHiveQl("SELECT greatest(1,null,3)") - checkHiveQl("SELECT if(1==2, 'yes', 'no')") - checkHiveQl("SELECT isnan(15), isnan('invalid')") - checkHiveQl("SELECT isnull(null), isnull('a')") - checkHiveQl("SELECT isnotnull(null), isnotnull('a')") - checkHiveQl("SELECT least(1,null,3)") - checkHiveQl("SELECT named_struct('c1',1,'c2',2,'c3',3)") - checkHiveQl("SELECT nanvl(a, 5), nanvl(b, 10), nanvl(d, c) from t2") - checkHiveQl("SELECT nvl(null, 1, 2)") - checkHiveQl("SELECT rand(1)") - checkHiveQl("SELECT randn(3)") - checkHiveQl("SELECT struct(1,2,3)") + // checkSqlGeneration("SELECT explode(array(1,2,3))") + checkSqlGeneration("SELECT greatest(1,null,3)") + checkSqlGeneration("SELECT if(1==2, 'yes', 'no')") + checkSqlGeneration("SELECT isnan(15), isnan('invalid')") + checkSqlGeneration("SELECT isnull(null), isnull('a')") + checkSqlGeneration("SELECT isnotnull(null), isnotnull('a')") + checkSqlGeneration("SELECT least(1,null,3)") + checkSqlGeneration("SELECT named_struct('c1',1,'c2',2,'c3',3)") + checkSqlGeneration("SELECT nanvl(a, 5), nanvl(b, 10), nanvl(d, c) from t2") + checkSqlGeneration("SELECT nvl(null, 1, 2)") + checkSqlGeneration("SELECT rand(1)") + checkSqlGeneration("SELECT randn(3)") + checkSqlGeneration("SELECT struct(1,2,3)") } test("math functions") { - checkHiveQl("SELECT acos(-1)") - checkHiveQl("SELECT asin(-1)") - checkHiveQl("SELECT atan(1)") - checkHiveQl("SELECT atan2(1, 1)") - checkHiveQl("SELECT bin(10)") - checkHiveQl("SELECT cbrt(1000.0)") - checkHiveQl("SELECT ceil(2.333)") - checkHiveQl("SELECT ceiling(2.333)") - checkHiveQl("SELECT cos(1.0)") - checkHiveQl("SELECT cosh(1.0)") - checkHiveQl("SELECT conv(15, 10, 16)") - checkHiveQl("SELECT degrees(pi())") - checkHiveQl("SELECT e()") - checkHiveQl("SELECT exp(1.0)") - checkHiveQl("SELECT expm1(1.0)") - checkHiveQl("SELECT floor(-2.333)") - checkHiveQl("SELECT factorial(5)") - checkHiveQl("SELECT hex(10)") - checkHiveQl("SELECT hypot(3, 4)") - checkHiveQl("SELECT log(10.0)") - checkHiveQl("SELECT log10(1000.0)") - checkHiveQl("SELECT log1p(0.0)") - checkHiveQl("SELECT log2(8.0)") - checkHiveQl("SELECT ln(10.0)") - checkHiveQl("SELECT negative(-1)") - checkHiveQl("SELECT pi()") - checkHiveQl("SELECT pmod(3, 2)") - checkHiveQl("SELECT positive(3)") - checkHiveQl("SELECT pow(2, 3)") - checkHiveQl("SELECT power(2, 3)") - checkHiveQl("SELECT radians(180.0)") - checkHiveQl("SELECT rint(1.63)") - checkHiveQl("SELECT round(31.415, -1)") - checkHiveQl("SELECT shiftleft(2, 3)") - checkHiveQl("SELECT shiftright(16, 3)") - checkHiveQl("SELECT shiftrightunsigned(16, 3)") - checkHiveQl("SELECT sign(-2.63)") - checkHiveQl("SELECT signum(-2.63)") - checkHiveQl("SELECT sin(1.0)") - checkHiveQl("SELECT sinh(1.0)") - checkHiveQl("SELECT sqrt(100.0)") - checkHiveQl("SELECT tan(1.0)") - checkHiveQl("SELECT tanh(1.0)") + checkSqlGeneration("SELECT acos(-1)") + checkSqlGeneration("SELECT asin(-1)") + checkSqlGeneration("SELECT atan(1)") + checkSqlGeneration("SELECT atan2(1, 1)") + checkSqlGeneration("SELECT bin(10)") + checkSqlGeneration("SELECT cbrt(1000.0)") + checkSqlGeneration("SELECT ceil(2.333)") + checkSqlGeneration("SELECT ceiling(2.333)") + checkSqlGeneration("SELECT cos(1.0)") + checkSqlGeneration("SELECT cosh(1.0)") + checkSqlGeneration("SELECT conv(15, 10, 16)") + checkSqlGeneration("SELECT degrees(pi())") + checkSqlGeneration("SELECT e()") + checkSqlGeneration("SELECT exp(1.0)") + checkSqlGeneration("SELECT expm1(1.0)") + checkSqlGeneration("SELECT floor(-2.333)") + checkSqlGeneration("SELECT factorial(5)") + checkSqlGeneration("SELECT hex(10)") + checkSqlGeneration("SELECT hypot(3, 4)") + checkSqlGeneration("SELECT log(10.0)") + checkSqlGeneration("SELECT log10(1000.0)") + checkSqlGeneration("SELECT log1p(0.0)") + checkSqlGeneration("SELECT log2(8.0)") + checkSqlGeneration("SELECT ln(10.0)") + checkSqlGeneration("SELECT negative(-1)") + checkSqlGeneration("SELECT pi()") + checkSqlGeneration("SELECT pmod(3, 2)") + checkSqlGeneration("SELECT positive(3)") + checkSqlGeneration("SELECT pow(2, 3)") + checkSqlGeneration("SELECT power(2, 3)") + checkSqlGeneration("SELECT radians(180.0)") + checkSqlGeneration("SELECT rint(1.63)") + checkSqlGeneration("SELECT round(31.415, -1)") + checkSqlGeneration("SELECT shiftleft(2, 3)") + checkSqlGeneration("SELECT shiftright(16, 3)") + checkSqlGeneration("SELECT shiftrightunsigned(16, 3)") + checkSqlGeneration("SELECT sign(-2.63)") + checkSqlGeneration("SELECT signum(-2.63)") + checkSqlGeneration("SELECT sin(1.0)") + checkSqlGeneration("SELECT sinh(1.0)") + checkSqlGeneration("SELECT sqrt(100.0)") + checkSqlGeneration("SELECT tan(1.0)") + checkSqlGeneration("SELECT tanh(1.0)") } test("aggregate functions") { - checkHiveQl("SELECT approx_count_distinct(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT avg(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT corr(value, key) FROM t1 GROUP BY key") - checkHiveQl("SELECT count(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT covar_pop(value, key) FROM t1 GROUP BY key") - checkHiveQl("SELECT covar_samp(value, key) FROM t1 GROUP BY key") - checkHiveQl("SELECT first(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT first_value(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT kurtosis(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT last(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT last_value(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT max(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT mean(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT min(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT skewness(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT stddev(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT stddev_pop(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT stddev_samp(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT sum(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT variance(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT var_pop(value) FROM t1 GROUP BY key") - checkHiveQl("SELECT var_samp(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT approx_count_distinct(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT avg(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT corr(value, key) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT count(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT covar_pop(value, key) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT covar_samp(value, key) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT first(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT first_value(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT kurtosis(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT last(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT last_value(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT max(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT mean(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT min(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT skewness(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT stddev(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT stddev_pop(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT stddev_samp(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT sum(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT variance(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT var_pop(value) FROM t1 GROUP BY key") + checkSqlGeneration("SELECT var_samp(value) FROM t1 GROUP BY key") } test("string functions") { - checkHiveQl("SELECT ascii('SparkSql')") - checkHiveQl("SELECT base64(a) FROM t0") - checkHiveQl("SELECT concat('This ', 'is ', 'a ', 'test')") - checkHiveQl("SELECT concat_ws(' ', 'This', 'is', 'a', 'test')") - checkHiveQl("SELECT decode(a, 'UTF-8') FROM t0") - checkHiveQl("SELECT encode('SparkSql', 'UTF-8')") - checkHiveQl("SELECT find_in_set('ab', 'abc,b,ab,c,def')") - checkHiveQl("SELECT format_number(1234567.890, 2)") - checkHiveQl("SELECT format_string('aa%d%s',123, 'cc')") - checkHiveQl("SELECT get_json_object('{\"a\":\"bc\"}','$.a')") - checkHiveQl("SELECT initcap('This is a test')") - checkHiveQl("SELECT instr('This is a test', 'is')") - checkHiveQl("SELECT lcase('SparkSql')") - checkHiveQl("SELECT length('This is a test')") - checkHiveQl("SELECT levenshtein('This is a test', 'Another test')") - checkHiveQl("SELECT lower('SparkSql')") - checkHiveQl("SELECT locate('is', 'This is a test', 3)") - checkHiveQl("SELECT lpad('SparkSql', 16, 'Learning')") - checkHiveQl("SELECT ltrim(' SparkSql ')") + checkSqlGeneration("SELECT ascii('SparkSql')") + checkSqlGeneration("SELECT base64(a) FROM t0") + checkSqlGeneration("SELECT concat('This ', 'is ', 'a ', 'test')") + checkSqlGeneration("SELECT concat_ws(' ', 'This', 'is', 'a', 'test')") + checkSqlGeneration("SELECT decode(a, 'UTF-8') FROM t0") + checkSqlGeneration("SELECT encode('SparkSql', 'UTF-8')") + checkSqlGeneration("SELECT find_in_set('ab', 'abc,b,ab,c,def')") + checkSqlGeneration("SELECT format_number(1234567.890, 2)") + checkSqlGeneration("SELECT format_string('aa%d%s',123, 'cc')") + checkSqlGeneration("SELECT get_json_object('{\"a\":\"bc\"}','$.a')") + checkSqlGeneration("SELECT initcap('This is a test')") + checkSqlGeneration("SELECT instr('This is a test', 'is')") + checkSqlGeneration("SELECT lcase('SparkSql')") + checkSqlGeneration("SELECT length('This is a test')") + checkSqlGeneration("SELECT levenshtein('This is a test', 'Another test')") + checkSqlGeneration("SELECT lower('SparkSql')") + checkSqlGeneration("SELECT locate('is', 'This is a test', 3)") + checkSqlGeneration("SELECT lpad('SparkSql', 16, 'Learning')") + checkSqlGeneration("SELECT ltrim(' SparkSql ')") // wait for resolution of JIRA SPARK-12719 SQL Generation for Generators - // checkHiveQl("SELECT json_tuple('{\"f1\": \"value1\", \"f2\": \"value2\"}','f1')") - checkHiveQl("SELECT printf('aa%d%s', 123, 'cc')") - checkHiveQl("SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1)") - checkHiveQl("SELECT regexp_replace('100-200', '(\\d+)', 'num')") - checkHiveQl("SELECT repeat('SparkSql', 3)") - checkHiveQl("SELECT reverse('SparkSql')") - checkHiveQl("SELECT rpad('SparkSql', 16, ' is Cool')") - checkHiveQl("SELECT rtrim(' SparkSql ')") - checkHiveQl("SELECT soundex('SparkSql')") - checkHiveQl("SELECT space(2)") - checkHiveQl("SELECT split('aa2bb3cc', '[1-9]+')") - checkHiveQl("SELECT space(2)") - checkHiveQl("SELECT substr('This is a test', 'is')") - checkHiveQl("SELECT substring('This is a test', 'is')") - checkHiveQl("SELECT substring_index('www.apache.org','.',1)") - checkHiveQl("SELECT translate('translate', 'rnlt', '123')") - checkHiveQl("SELECT trim(' SparkSql ')") - checkHiveQl("SELECT ucase('SparkSql')") - checkHiveQl("SELECT unbase64('SparkSql')") - checkHiveQl("SELECT unhex(41)") - checkHiveQl("SELECT upper('SparkSql')") + // checkSqlGeneration("SELECT json_tuple('{\"f1\": \"value1\", \"f2\": \"value2\"}','f1')") + checkSqlGeneration("SELECT printf('aa%d%s', 123, 'cc')") + checkSqlGeneration("SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1)") + checkSqlGeneration("SELECT regexp_replace('100-200', '(\\d+)', 'num')") + checkSqlGeneration("SELECT repeat('SparkSql', 3)") + checkSqlGeneration("SELECT reverse('SparkSql')") + checkSqlGeneration("SELECT rpad('SparkSql', 16, ' is Cool')") + checkSqlGeneration("SELECT rtrim(' SparkSql ')") + checkSqlGeneration("SELECT soundex('SparkSql')") + checkSqlGeneration("SELECT space(2)") + checkSqlGeneration("SELECT split('aa2bb3cc', '[1-9]+')") + checkSqlGeneration("SELECT space(2)") + checkSqlGeneration("SELECT substr('This is a test', 'is')") + checkSqlGeneration("SELECT substring('This is a test', 'is')") + checkSqlGeneration("SELECT substring_index('www.apache.org','.',1)") + checkSqlGeneration("SELECT translate('translate', 'rnlt', '123')") + checkSqlGeneration("SELECT trim(' SparkSql ')") + checkSqlGeneration("SELECT ucase('SparkSql')") + checkSqlGeneration("SELECT unbase64('SparkSql')") + checkSqlGeneration("SELECT unhex(41)") + checkSqlGeneration("SELECT upper('SparkSql')") } test("datetime functions") { - checkHiveQl("SELECT add_months('2001-03-31', 1)") - checkHiveQl("SELECT count(current_date())") - checkHiveQl("SELECT count(current_timestamp())") - checkHiveQl("SELECT datediff('2001-01-02', '2001-01-01')") - checkHiveQl("SELECT date_add('2001-01-02', 1)") - checkHiveQl("SELECT date_format('2001-05-02', 'yyyy-dd')") - checkHiveQl("SELECT date_sub('2001-01-02', 1)") - checkHiveQl("SELECT day('2001-05-02')") - checkHiveQl("SELECT dayofyear('2001-05-02')") - checkHiveQl("SELECT dayofmonth('2001-05-02')") - checkHiveQl("SELECT from_unixtime(1000, 'yyyy-MM-dd HH:mm:ss')") - checkHiveQl("SELECT from_utc_timestamp('2015-07-24 00:00:00', 'PST')") - checkHiveQl("SELECT hour('11:35:55')") - checkHiveQl("SELECT last_day('2001-01-01')") - checkHiveQl("SELECT minute('11:35:55')") - checkHiveQl("SELECT month('2001-05-02')") - checkHiveQl("SELECT months_between('2001-10-30 10:30:00', '1996-10-30')") - checkHiveQl("SELECT next_day('2001-05-02', 'TU')") - checkHiveQl("SELECT count(now())") - checkHiveQl("SELECT quarter('2001-05-02')") - checkHiveQl("SELECT second('11:35:55')") - checkHiveQl("SELECT to_date('2001-10-30 10:30:00')") - checkHiveQl("SELECT to_unix_timestamp('2015-07-24 00:00:00', 'yyyy-MM-dd HH:mm:ss')") - checkHiveQl("SELECT to_utc_timestamp('2015-07-24 00:00:00', 'PST')") - checkHiveQl("SELECT trunc('2001-10-30 10:30:00', 'YEAR')") - checkHiveQl("SELECT unix_timestamp('2001-10-30 10:30:00')") - checkHiveQl("SELECT weekofyear('2001-05-02')") - checkHiveQl("SELECT year('2001-05-02')") + checkSqlGeneration("SELECT add_months('2001-03-31', 1)") + checkSqlGeneration("SELECT count(current_date())") + checkSqlGeneration("SELECT count(current_timestamp())") + checkSqlGeneration("SELECT datediff('2001-01-02', '2001-01-01')") + checkSqlGeneration("SELECT date_add('2001-01-02', 1)") + checkSqlGeneration("SELECT date_format('2001-05-02', 'yyyy-dd')") + checkSqlGeneration("SELECT date_sub('2001-01-02', 1)") + checkSqlGeneration("SELECT day('2001-05-02')") + checkSqlGeneration("SELECT dayofyear('2001-05-02')") + checkSqlGeneration("SELECT dayofmonth('2001-05-02')") + checkSqlGeneration("SELECT from_unixtime(1000, 'yyyy-MM-dd HH:mm:ss')") + checkSqlGeneration("SELECT from_utc_timestamp('2015-07-24 00:00:00', 'PST')") + checkSqlGeneration("SELECT hour('11:35:55')") + checkSqlGeneration("SELECT last_day('2001-01-01')") + checkSqlGeneration("SELECT minute('11:35:55')") + checkSqlGeneration("SELECT month('2001-05-02')") + checkSqlGeneration("SELECT months_between('2001-10-30 10:30:00', '1996-10-30')") + checkSqlGeneration("SELECT next_day('2001-05-02', 'TU')") + checkSqlGeneration("SELECT count(now())") + checkSqlGeneration("SELECT quarter('2001-05-02')") + checkSqlGeneration("SELECT second('11:35:55')") + checkSqlGeneration("SELECT to_date('2001-10-30 10:30:00')") + checkSqlGeneration("SELECT to_unix_timestamp('2015-07-24 00:00:00', 'yyyy-MM-dd HH:mm:ss')") + checkSqlGeneration("SELECT to_utc_timestamp('2015-07-24 00:00:00', 'PST')") + checkSqlGeneration("SELECT trunc('2001-10-30 10:30:00', 'YEAR')") + checkSqlGeneration("SELECT unix_timestamp('2001-10-30 10:30:00')") + checkSqlGeneration("SELECT weekofyear('2001-05-02')") + checkSqlGeneration("SELECT year('2001-05-02')") - checkHiveQl("SELECT interval 3 years - 3 month 7 week 123 microseconds as i") + checkSqlGeneration("SELECT interval 3 years - 3 month 7 week 123 microseconds as i") } test("collection functions") { - checkHiveQl("SELECT array_contains(array(2, 9, 8), 9)") - checkHiveQl("SELECT size(array('b', 'd', 'c', 'a'))") - checkHiveQl("SELECT sort_array(array('b', 'd', 'c', 'a'))") + checkSqlGeneration("SELECT array_contains(array(2, 9, 8), 9)") + checkSqlGeneration("SELECT size(array('b', 'd', 'c', 'a'))") + checkSqlGeneration("SELECT sort_array(array('b', 'd', 'c', 'a'))") } test("misc functions") { - checkHiveQl("SELECT crc32('Spark')") - checkHiveQl("SELECT md5('Spark')") - checkHiveQl("SELECT hash('Spark')") - checkHiveQl("SELECT sha('Spark')") - checkHiveQl("SELECT sha1('Spark')") - checkHiveQl("SELECT sha2('Spark', 0)") - checkHiveQl("SELECT spark_partition_id()") - checkHiveQl("SELECT input_file_name()") - checkHiveQl("SELECT monotonically_increasing_id()") + checkSqlGeneration("SELECT crc32('Spark')") + checkSqlGeneration("SELECT md5('Spark')") + checkSqlGeneration("SELECT hash('Spark')") + checkSqlGeneration("SELECT sha('Spark')") + checkSqlGeneration("SELECT sha1('Spark')") + checkSqlGeneration("SELECT sha2('Spark', 0)") + checkSqlGeneration("SELECT spark_partition_id()") + checkSqlGeneration("SELECT input_file_name()") + checkSqlGeneration("SELECT monotonically_increasing_id()") } }