Skip to content

Commit

Permalink
add a sep conf
Browse files Browse the repository at this point in the history
  • Loading branch information
yaooqinn committed Nov 7, 2019
1 parent da119c5 commit fff17d5
Show file tree
Hide file tree
Showing 8 changed files with 179 additions and 76 deletions.
Expand Up @@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.IntervalStyle
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.UTF8StringBuilder
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
Expand Down Expand Up @@ -281,8 +282,9 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit

// UDFToString
private[this] def castToString(from: DataType): Any => Any = from match {
case CalendarIntervalType if ansiEnabled => buildCast[CalendarInterval](_,
i => UTF8String.fromString(IntervalUtils.toSqlStandardString(i)))
case CalendarIntervalType if SQLConf.get.intervalOutputStyle == IntervalStyle.SQL_STANDARD =>
buildCast[CalendarInterval](_, i =>
UTF8String.fromString(IntervalUtils.toSqlStandardString(i)))
case BinaryType => buildCast[Array[Byte]](_, UTF8String.fromBytes)
case DateType => buildCast[Int](_, d => UTF8String.fromString(dateFormatter.format(d)))
case TimestampType => buildCast[Long](_,
Expand Down
Expand Up @@ -409,8 +409,6 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
s"TIMESTAMP('${formatter.format(v)}')"
case (v: Array[Byte], BinaryType) => s"X'${DatatypeConverter.printHexBinary(v)}'"
case (v: CalendarInterval, CalendarIntervalType) if SQLConf.get.ansiEnabled =>
IntervalUtils.toSqlStandardString(v)
case _ => value.toString
}
}
Expand Up @@ -37,7 +37,6 @@ import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
import org.apache.spark.sql.catalyst.plans.logical.HintErrorHandler
import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
import org.apache.spark.unsafe.array.ByteArrayMethods
import org.apache.spark.util.Utils

Expand Down Expand Up @@ -1774,6 +1773,19 @@ object SQLConf {
.booleanConf
.createWithDefault(false)

object IntervalStyle extends Enumeration {
val SQL_STANDARD, MULTI_UNITS = Value
}

val INTERVAL_STYLE = buildConf("spark.sql.IntervalOutputStyle")
.doc("Display format for interval values. The value SQL_STANDARD will produce output" +
" matching SQL standard interval literals. The value MULTI_UNITS (which is the default)" +
" will produce output in form of value unit pairs, i.e. '3 year 2 months 10 days'")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(IntervalStyle.values.map(_.toString))
.createWithDefault(IntervalStyle.MULTI_UNITS.toString)

val SORT_BEFORE_REPARTITION =
buildConf("spark.sql.execution.sortBeforeRepartition")
.internal()
Expand Down Expand Up @@ -2502,6 +2514,8 @@ class SQLConf extends Serializable with Logging {
def storeAssignmentPolicy: StoreAssignmentPolicy.Value =
StoreAssignmentPolicy.withName(getConf(STORE_ASSIGNMENT_POLICY))

def intervalOutputStyle: IntervalStyle.Value = IntervalStyle.withName(getConf(INTERVAL_STYLE))

def ansiEnabled: Boolean = getConf(ANSI_ENABLED)

def usePostgreSQLDialect: Boolean = getConf(DIALECT) == Dialect.POSTGRESQL.toString()
Expand Down
Expand Up @@ -24,6 +24,7 @@ import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, IntervalUtils, TimestampFormatter}
import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.IntervalStyle
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval

Expand Down Expand Up @@ -81,7 +82,6 @@ object HiveResult {
private lazy val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)
private lazy val dateFormatter = DateFormatter(zoneId)
private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
private lazy val ansiEnabled = SQLConf.get.ansiEnabled

/** Hive outputs fields of structs slightly differently than top level attributes. */
private def toHiveStructString(a: (Any, DataType)): String = a match {
Expand All @@ -99,7 +99,8 @@ object HiveResult {
case (null, _) => "null"
case (s: String, StringType) => "\"" + s + "\""
case (decimal, DecimalType()) => decimal.toString
case (interval: CalendarInterval, CalendarIntervalType) if ansiEnabled =>
case (interval: CalendarInterval, CalendarIntervalType)
if SQLConf.get.intervalOutputStyle == IntervalStyle.SQL_STANDARD =>
IntervalUtils.toSqlStandardString(interval)
case (interval, CalendarIntervalType) => interval.toString
case (other, tpe) if primitiveTypes contains tpe => other.toString
Expand All @@ -124,7 +125,8 @@ object HiveResult {
DateTimeUtils.timestampToString(timestampFormatter, DateTimeUtils.fromJavaTimestamp(t))
case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
case (decimal: java.math.BigDecimal, DecimalType()) => formatDecimal(decimal)
case (interval: CalendarInterval, CalendarIntervalType) if ansiEnabled =>
case (interval: CalendarInterval, CalendarIntervalType)
if SQLConf.get.intervalOutputStyle == IntervalStyle.SQL_STANDARD =>
IntervalUtils.toSqlStandardString(interval)
case (interval, CalendarIntervalType) => interval.toString
case (other, _ : UserDefinedType[_]) => other.toString
Expand Down
11 changes: 0 additions & 11 deletions sql/core/src/test/resources/sql-tests/inputs/datetime.sql
Expand Up @@ -36,14 +36,3 @@ select date '2001-10-01' - 7;
select date '2001-10-01' - date '2001-09-28';
select date'2020-01-01' - timestamp'2019-10-06 10:11:12.345678';
select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01';

-- interval operations
select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15');
select interval 4 month 2 weeks 3 microseconds * 1.5;
select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5;

-- interval operation with null and zero case
select interval '2 seconds' / 0;
select interval '2 seconds' / null;
select interval '2 seconds' * null;
select null * interval '2 seconds';
40 changes: 40 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -0,0 +1,40 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--

-- test for interacting with intervals

-- using SQL_STANDARD output style
set spark.sql.IntervalOutputStyle=SQL_STANDARD;
select interval 4 month 2 weeks 3 microseconds;
select interval '1 year 20 month';
select interval '-1 year -20 month';
select interval '20 month 30 days -21 hours 10 minutes 999 milliseconds';
select date'2019-10-15' - timestamp'2019-10-15 10:11:12.001002';

-- using MULTI_UNITS (which is default) output style
set spark.sql.IntervalOutputStyle=MULTI_UNITS;

-- interval operations
select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15');
select interval 4 month 2 weeks 3 microseconds * 1.5;
select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5;

-- interval operation with null and zero case
select interval '2 seconds' / 0;
select interval '2 seconds' / null;
select interval '2 seconds' * null;
select null * interval '2 seconds';
58 changes: 1 addition & 57 deletions sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 24
-- Number of queries: 17


-- !query 0
Expand Down Expand Up @@ -145,59 +145,3 @@ select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01'
struct<subtracttimestamps(TIMESTAMP('2019-10-06 10:11:12.345678'), CAST(DATE '2020-01-01' AS TIMESTAMP)):interval>
-- !query 16 output
-2078 hours -48 minutes -47.654322 seconds


-- !query 17
select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15')
-- !query 17 schema
struct<multiply_interval(subtracttimestamps(TIMESTAMP('2019-10-15 10:11:12.001002'), CAST(DATE '2019-10-15' AS TIMESTAMP)), CAST(3 AS DOUBLE)):interval>
-- !query 17 output
30 hours 33 minutes 36.003006 seconds


-- !query 18
select interval 4 month 2 weeks 3 microseconds * 1.5
-- !query 18 schema
struct<multiply_interval(4 months 14 days 0.000003 seconds, CAST(1.5 AS DOUBLE)):interval>
-- !query 18 output
6 months 21 days 0.000005 seconds


-- !query 19
select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5
-- !query 19 schema
struct<divide_interval(subtracttimestamps(TIMESTAMP('2019-10-15 00:00:00'), TIMESTAMP('2019-10-14 00:00:00')), CAST(1.5 AS DOUBLE)):interval>
-- !query 19 output
16 hours


-- !query 20
select interval '2 seconds' / 0
-- !query 20 schema
struct<divide_interval(2 seconds, CAST(0 AS DOUBLE)):interval>
-- !query 20 output
NULL


-- !query 21
select interval '2 seconds' / null
-- !query 21 schema
struct<divide_interval(2 seconds, CAST(NULL AS DOUBLE)):interval>
-- !query 21 output
NULL


-- !query 22
select interval '2 seconds' * null
-- !query 22 schema
struct<multiply_interval(2 seconds, CAST(NULL AS DOUBLE)):interval>
-- !query 22 output
NULL


-- !query 23
select null * interval '2 seconds'
-- !query 23 schema
struct<multiply_interval(2 seconds, CAST(NULL AS DOUBLE)):interval>
-- !query 23 output
NULL
114 changes: 114 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -0,0 +1,114 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 14


-- !query 0
set spark.sql.IntervalOutputStyle=SQL_STANDARD
-- !query 0 schema
struct<key:string,value:string>
-- !query 0 output
spark.sql.IntervalOutputStyle SQL_STANDARD


-- !query 1
select interval 4 month 2 weeks 3 microseconds
-- !query 1 schema
struct<4 months 14 days 0.000003 seconds:interval>
-- !query 1 output
0-4 14 0:00:00.000003


-- !query 2
select interval '1 year 20 month'
-- !query 2 schema
struct<2 years 8 months:interval>
-- !query 2 output
2-8


-- !query 3
select interval '-1 year -20 month'
-- !query 3 schema
struct<-2 years -8 months:interval>
-- !query 3 output
-2-8


-- !query 4
select interval '20 month 30 days -21 hours 10 minutes 999 milliseconds'
-- !query 4 schema
struct<1 years 8 months 30 days -20 hours -49 minutes -59.001 seconds:interval>
-- !query 4 output
1-8 30 -20:49:59.001


-- !query 5
select date'2019-10-15' - timestamp'2019-10-15 10:11:12.001002'
-- !query 5 schema
struct<subtracttimestamps(CAST(DATE '2019-10-15' AS TIMESTAMP), TIMESTAMP('2019-10-15 10:11:12.001002')):interval>
-- !query 5 output
-10:11:12.001002


-- !query 6
set spark.sql.IntervalOutputStyle=MULTI_UNITS
-- !query 6 schema
struct<key:string,value:string>
-- !query 6 output
spark.sql.IntervalOutputStyle MULTI_UNITS


-- !query 7
select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15')
-- !query 7 schema
struct<multiply_interval(subtracttimestamps(TIMESTAMP('2019-10-15 10:11:12.001002'), CAST(DATE '2019-10-15' AS TIMESTAMP)), CAST(3 AS DOUBLE)):interval>
-- !query 7 output
30 hours 33 minutes 36.003006 seconds


-- !query 8
select interval 4 month 2 weeks 3 microseconds * 1.5
-- !query 8 schema
struct<multiply_interval(4 months 14 days 0.000003 seconds, CAST(1.5 AS DOUBLE)):interval>
-- !query 8 output
6 months 21 days 0.000005 seconds


-- !query 9
select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5
-- !query 9 schema
struct<divide_interval(subtracttimestamps(TIMESTAMP('2019-10-15 00:00:00'), TIMESTAMP('2019-10-14 00:00:00')), CAST(1.5 AS DOUBLE)):interval>
-- !query 9 output
16 hours


-- !query 10
select interval '2 seconds' / 0
-- !query 10 schema
struct<divide_interval(2 seconds, CAST(0 AS DOUBLE)):interval>
-- !query 10 output
NULL


-- !query 11
select interval '2 seconds' / null
-- !query 11 schema
struct<divide_interval(2 seconds, CAST(NULL AS DOUBLE)):interval>
-- !query 11 output
NULL


-- !query 12
select interval '2 seconds' * null
-- !query 12 schema
struct<multiply_interval(2 seconds, CAST(NULL AS DOUBLE)):interval>
-- !query 12 output
NULL


-- !query 13
select null * interval '2 seconds'
-- !query 13 schema
struct<multiply_interval(2 seconds, CAST(NULL AS DOUBLE)):interval>
-- !query 13 output
NULL

0 comments on commit fff17d5

Please sign in to comment.