From fff17d5a28acda7f60f87b8dcf133a5252159516 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 7 Nov 2019 20:56:38 +0800 Subject: [PATCH] add a sep conf --- .../spark/sql/catalyst/expressions/Cast.scala | 6 +- .../sql/catalyst/expressions/literals.scala | 2 - .../apache/spark/sql/internal/SQLConf.scala | 16 ++- .../spark/sql/execution/HiveResult.scala | 8 +- .../resources/sql-tests/inputs/datetime.sql | 11 -- .../resources/sql-tests/inputs/interval.sql | 40 ++++++ .../sql-tests/results/datetime.sql.out | 58 +-------- .../sql-tests/results/interval.sql.out | 114 ++++++++++++++++++ 8 files changed, 179 insertions(+), 76 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/interval.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/interval.sql.out diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index ae9203de9590e..9e54e08b74128 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.IntervalStyle import org.apache.spark.sql.types._ import org.apache.spark.unsafe.UTF8StringBuilder import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} @@ -281,8 +282,9 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // UDFToString private[this] def castToString(from: DataType): Any => Any = from match { - case CalendarIntervalType if ansiEnabled => buildCast[CalendarInterval](_, - i => UTF8String.fromString(IntervalUtils.toSqlStandardString(i))) + case CalendarIntervalType if SQLConf.get.intervalOutputStyle == IntervalStyle.SQL_STANDARD => + buildCast[CalendarInterval](_, i => + UTF8String.fromString(IntervalUtils.toSqlStandardString(i))) case BinaryType => buildCast[Array[Byte]](_, UTF8String.fromBytes) case DateType => buildCast[Int](_, d => UTF8String.fromString(dateFormatter.format(d))) case TimestampType => buildCast[Long](_, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 960035af640c9..5a5d7a17acd99 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -409,8 +409,6 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression { DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)) s"TIMESTAMP('${formatter.format(v)}')" case (v: Array[Byte], BinaryType) => s"X'${DatatypeConverter.printHexBinary(v)}'" - case (v: CalendarInterval, CalendarIntervalType) if SQLConf.get.ansiEnabled => - IntervalUtils.toSqlStandardString(v) case _ => value.toString } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index a228d9f064a1e..d6293328fd4f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -37,7 +37,6 @@ import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.plans.logical.HintErrorHandler import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME -import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy import org.apache.spark.unsafe.array.ByteArrayMethods import org.apache.spark.util.Utils @@ -1774,6 +1773,19 @@ object SQLConf { .booleanConf .createWithDefault(false) + object IntervalStyle extends Enumeration { + val SQL_STANDARD, MULTI_UNITS = Value + } + + val INTERVAL_STYLE = buildConf("spark.sql.IntervalOutputStyle") + .doc("Display format for interval values. The value SQL_STANDARD will produce output" + + " matching SQL standard interval literals. The value MULTI_UNITS (which is the default)" + + " will produce output in form of value unit pairs, i.e. '3 year 2 months 10 days'") + .stringConf + .transform(_.toUpperCase(Locale.ROOT)) + .checkValues(IntervalStyle.values.map(_.toString)) + .createWithDefault(IntervalStyle.MULTI_UNITS.toString) + val SORT_BEFORE_REPARTITION = buildConf("spark.sql.execution.sortBeforeRepartition") .internal() @@ -2502,6 +2514,8 @@ class SQLConf extends Serializable with Logging { def storeAssignmentPolicy: StoreAssignmentPolicy.Value = StoreAssignmentPolicy.withName(getConf(STORE_ASSIGNMENT_POLICY)) + def intervalOutputStyle: IntervalStyle.Value = IntervalStyle.withName(getConf(INTERVAL_STYLE)) + def ansiEnabled: Boolean = getConf(ANSI_ENABLED) def usePostgreSQLDialect: Boolean = getConf(DIALECT) == Dialect.POSTGRESQL.toString() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index d0bdb2fa552e1..66f673b4af12c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, IntervalUtils, TimestampFormatter} import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand} import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.IntervalStyle import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval @@ -81,7 +82,6 @@ object HiveResult { private lazy val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone) private lazy val dateFormatter = DateFormatter(zoneId) private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId) - private lazy val ansiEnabled = SQLConf.get.ansiEnabled /** Hive outputs fields of structs slightly differently than top level attributes. */ private def toHiveStructString(a: (Any, DataType)): String = a match { @@ -99,7 +99,8 @@ object HiveResult { case (null, _) => "null" case (s: String, StringType) => "\"" + s + "\"" case (decimal, DecimalType()) => decimal.toString - case (interval: CalendarInterval, CalendarIntervalType) if ansiEnabled => + case (interval: CalendarInterval, CalendarIntervalType) + if SQLConf.get.intervalOutputStyle == IntervalStyle.SQL_STANDARD => IntervalUtils.toSqlStandardString(interval) case (interval, CalendarIntervalType) => interval.toString case (other, tpe) if primitiveTypes contains tpe => other.toString @@ -124,7 +125,8 @@ object HiveResult { DateTimeUtils.timestampToString(timestampFormatter, DateTimeUtils.fromJavaTimestamp(t)) case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8) case (decimal: java.math.BigDecimal, DecimalType()) => formatDecimal(decimal) - case (interval: CalendarInterval, CalendarIntervalType) if ansiEnabled => + case (interval: CalendarInterval, CalendarIntervalType) + if SQLConf.get.intervalOutputStyle == IntervalStyle.SQL_STANDARD => IntervalUtils.toSqlStandardString(interval) case (interval, CalendarIntervalType) => interval.toString case (other, _ : UserDefinedType[_]) => other.toString diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 09eb185bd4c02..0e22af1fbdf29 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -36,14 +36,3 @@ select date '2001-10-01' - 7; select date '2001-10-01' - date '2001-09-28'; select date'2020-01-01' - timestamp'2019-10-06 10:11:12.345678'; select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01'; - --- interval operations -select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15'); -select interval 4 month 2 weeks 3 microseconds * 1.5; -select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5; - --- interval operation with null and zero case -select interval '2 seconds' / 0; -select interval '2 seconds' / null; -select interval '2 seconds' * null; -select null * interval '2 seconds'; diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql new file mode 100644 index 0000000000000..445428ee88440 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -0,0 +1,40 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- test for interacting with intervals + +-- using SQL_STANDARD output style +set spark.sql.IntervalOutputStyle=SQL_STANDARD; +select interval 4 month 2 weeks 3 microseconds; +select interval '1 year 20 month'; +select interval '-1 year -20 month'; +select interval '20 month 30 days -21 hours 10 minutes 999 milliseconds'; +select date'2019-10-15' - timestamp'2019-10-15 10:11:12.001002'; + +-- using MULTI_UNITS (which is default) output style +set spark.sql.IntervalOutputStyle=MULTI_UNITS; + +-- interval operations +select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15'); +select interval 4 month 2 weeks 3 microseconds * 1.5; +select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5; + +-- interval operation with null and zero case +select interval '2 seconds' / 0; +select interval '2 seconds' / null; +select interval '2 seconds' * null; +select null * interval '2 seconds'; diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index b5ea7d66fd205..a40f5acb6d538 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 24 +-- Number of queries: 17 -- !query 0 @@ -145,59 +145,3 @@ select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01' struct -- !query 16 output -2078 hours -48 minutes -47.654322 seconds - - --- !query 17 -select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15') --- !query 17 schema -struct --- !query 17 output -30 hours 33 minutes 36.003006 seconds - - --- !query 18 -select interval 4 month 2 weeks 3 microseconds * 1.5 --- !query 18 schema -struct --- !query 18 output -6 months 21 days 0.000005 seconds - - --- !query 19 -select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5 --- !query 19 schema -struct --- !query 19 output -16 hours - - --- !query 20 -select interval '2 seconds' / 0 --- !query 20 schema -struct --- !query 20 output -NULL - - --- !query 21 -select interval '2 seconds' / null --- !query 21 schema -struct --- !query 21 output -NULL - - --- !query 22 -select interval '2 seconds' * null --- !query 22 schema -struct --- !query 22 output -NULL - - --- !query 23 -select null * interval '2 seconds' --- !query 23 schema -struct --- !query 23 output -NULL diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out new file mode 100644 index 0000000000000..4d6d6ca4fec85 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -0,0 +1,114 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 14 + + +-- !query 0 +set spark.sql.IntervalOutputStyle=SQL_STANDARD +-- !query 0 schema +struct +-- !query 0 output +spark.sql.IntervalOutputStyle SQL_STANDARD + + +-- !query 1 +select interval 4 month 2 weeks 3 microseconds +-- !query 1 schema +struct<4 months 14 days 0.000003 seconds:interval> +-- !query 1 output +0-4 14 0:00:00.000003 + + +-- !query 2 +select interval '1 year 20 month' +-- !query 2 schema +struct<2 years 8 months:interval> +-- !query 2 output +2-8 + + +-- !query 3 +select interval '-1 year -20 month' +-- !query 3 schema +struct<-2 years -8 months:interval> +-- !query 3 output +-2-8 + + +-- !query 4 +select interval '20 month 30 days -21 hours 10 minutes 999 milliseconds' +-- !query 4 schema +struct<1 years 8 months 30 days -20 hours -49 minutes -59.001 seconds:interval> +-- !query 4 output +1-8 30 -20:49:59.001 + + +-- !query 5 +select date'2019-10-15' - timestamp'2019-10-15 10:11:12.001002' +-- !query 5 schema +struct +-- !query 5 output +-10:11:12.001002 + + +-- !query 6 +set spark.sql.IntervalOutputStyle=MULTI_UNITS +-- !query 6 schema +struct +-- !query 6 output +spark.sql.IntervalOutputStyle MULTI_UNITS + + +-- !query 7 +select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15') +-- !query 7 schema +struct +-- !query 7 output +30 hours 33 minutes 36.003006 seconds + + +-- !query 8 +select interval 4 month 2 weeks 3 microseconds * 1.5 +-- !query 8 schema +struct +-- !query 8 output +6 months 21 days 0.000005 seconds + + +-- !query 9 +select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5 +-- !query 9 schema +struct +-- !query 9 output +16 hours + + +-- !query 10 +select interval '2 seconds' / 0 +-- !query 10 schema +struct +-- !query 10 output +NULL + + +-- !query 11 +select interval '2 seconds' / null +-- !query 11 schema +struct +-- !query 11 output +NULL + + +-- !query 12 +select interval '2 seconds' * null +-- !query 12 schema +struct +-- !query 12 output +NULL + + +-- !query 13 +select null * interval '2 seconds' +-- !query 13 schema +struct +-- !query 13 output +NULL