Skip to content

Commit

Permalink
[SPARK-30593][SQL] Revert interval ISO/ANSI SQL Standard output since…
Browse files Browse the repository at this point in the history
… we decide not to follow ANSI and no round trip

### What changes were proposed in this pull request?

This revert #26418, file a new ticket under  https://issues.apache.org/jira/browse/SPARK-30546 for better tracking interval behavior
### Why are the changes needed?

Revert interval ISO/ANSI SQL Standard output since we decide not to follow ANSI and there is no round trip

### Does this PR introduce any user-facing change?

no, not released yet

### How was this patch tested?

existing uts

Closes #27304 from yaooqinn/SPARK-30593.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
yaooqinn authored and cloud-fan committed Jan 21, 2020
1 parent 730388b commit af70542
Show file tree
Hide file tree
Showing 16 changed files with 44 additions and 309 deletions.
Expand Up @@ -46,6 +46,36 @@ public void equalsTest() {
assertEquals(i1, i6);
}

@Test
public void toStringTest() {
CalendarInterval i;

i = new CalendarInterval(0, 0, 0);
assertEquals("0 seconds", i.toString());

i = new CalendarInterval(34, 0, 0);
assertEquals("2 years 10 months", i.toString());

i = new CalendarInterval(-34, 0, 0);
assertEquals("-2 years -10 months", i.toString());

i = new CalendarInterval(0, 31, 0);
assertEquals("31 days", i.toString());

i = new CalendarInterval(0, -31, 0);
assertEquals("-31 days", i.toString());

i = new CalendarInterval(0, 0, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123);
assertEquals("3 hours 13 minutes 0.000123 seconds", i.toString());

i = new CalendarInterval(0, 0, -3 * MICROS_PER_HOUR - 13 * MICROS_PER_MINUTE - 123);
assertEquals("-3 hours -13 minutes -0.000123 seconds", i.toString());

i = new CalendarInterval(34, 31, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123);
assertEquals("2 years 10 months 31 days 3 hours 13 minutes 0.000123 seconds",
i.toString());
}

@Test
public void periodAndDurationTest() {
CalendarInterval interval = new CalendarInterval(120, -40, 123456);
Expand Down
Expand Up @@ -30,9 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.catalyst.util.IntervalUtils._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.IntervalStyle._
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.UTF8StringBuilder
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
Expand Down Expand Up @@ -283,14 +281,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit

// UDFToString
private[this] def castToString(from: DataType): Any => Any = from match {
case CalendarIntervalType => SQLConf.get.intervalOutputStyle match {
case SQL_STANDARD =>
buildCast[CalendarInterval](_, i => UTF8String.fromString(toSqlStandardString(i)))
case ISO_8601 =>
buildCast[CalendarInterval](_, i => UTF8String.fromString(toIso8601String(i)))
case MULTI_UNITS =>
buildCast[CalendarInterval](_, i => UTF8String.fromString(toMultiUnitsString(i)))
}
case CalendarIntervalType =>
buildCast[CalendarInterval](_, i => UTF8String.fromString(i.toString))
case BinaryType => buildCast[Array[Byte]](_, UTF8String.fromBytes)
case DateType => buildCast[Int](_, d => UTF8String.fromString(dateFormatter.format(d)))
case TimestampType => buildCast[Long](_,
Expand Down Expand Up @@ -1021,13 +1013,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
(c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString(
org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampToString($tf, $c));"""
case CalendarIntervalType =>
val iu = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
val funcName = SQLConf.get.intervalOutputStyle match {
case SQL_STANDARD => "toSqlStandardString"
case ISO_8601 => "toIso8601String"
case MULTI_UNITS => "toMultiUnitsString"
}
(c, evPrim, _) => code"""$evPrim = UTF8String.fromString($iu.$funcName($c));"""
(c, evPrim, _) => code"""$evPrim = UTF8String.fromString($c.toString());"""
case ArrayType(et, _) =>
(c, evPrim, evNull) => {
val buffer = ctx.freshVariable("buffer", classOf[UTF8StringBuilder])
Expand Down
Expand Up @@ -409,7 +409,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
s"TIMESTAMP '${formatter.format(v)}'"
case (i: CalendarInterval, CalendarIntervalType) =>
s"INTERVAL '${IntervalUtils.toMultiUnitsString(i)}'"
s"INTERVAL '${i.toString}'"
case (v: Array[Byte], BinaryType) => s"X'${DatatypeConverter.printHexBinary(v)}'"
case _ => value.toString
}
Expand Down
Expand Up @@ -497,85 +497,6 @@ object IntervalUtils {
fromDoubles(interval.months / num, interval.days / num, interval.microseconds / num)
}

// `toString` implementation in CalendarInterval is the multi-units format currently.
def toMultiUnitsString(interval: CalendarInterval): String = interval.toString

def toSqlStandardString(interval: CalendarInterval): String = {
val yearMonthPart = if (interval.months < 0) {
val ma = math.abs(interval.months)
"-" + ma / 12 + "-" + ma % 12
} else if (interval.months > 0) {
"+" + interval.months / 12 + "-" + interval.months % 12
} else {
""
}

val dayPart = if (interval.days < 0) {
interval.days.toString
} else if (interval.days > 0) {
"+" + interval.days
} else {
""
}

val timePart = if (interval.microseconds != 0) {
val sign = if (interval.microseconds > 0) "+" else "-"
val sb = new StringBuilder(sign)
var rest = math.abs(interval.microseconds)
sb.append(rest / MICROS_PER_HOUR)
sb.append(':')
rest %= MICROS_PER_HOUR
val minutes = rest / MICROS_PER_MINUTE;
if (minutes < 10) {
sb.append(0)
}
sb.append(minutes)
sb.append(':')
rest %= MICROS_PER_MINUTE
val bd = BigDecimal.valueOf(rest, 6)
if (bd.compareTo(new BigDecimal(10)) < 0) {
sb.append(0)
}
val s = bd.stripTrailingZeros().toPlainString
sb.append(s)
sb.toString()
} else {
""
}

val intervalList = Seq(yearMonthPart, dayPart, timePart).filter(_.nonEmpty)
if (intervalList.nonEmpty) intervalList.mkString(" ") else "0"
}

def toIso8601String(interval: CalendarInterval): String = {
val sb = new StringBuilder("P")

val year = interval.months / 12
if (year != 0) sb.append(year + "Y")
val month = interval.months % 12
if (month != 0) sb.append(month + "M")

if (interval.days != 0) sb.append(interval.days + "D")

if (interval.microseconds != 0) {
sb.append('T')
var rest = interval.microseconds
val hour = rest / MICROS_PER_HOUR
if (hour != 0) sb.append(hour + "H")
rest %= MICROS_PER_HOUR
val minute = rest / MICROS_PER_MINUTE
if (minute != 0) sb.append(minute + "M")
rest %= MICROS_PER_MINUTE
if (rest != 0) {
val bd = BigDecimal.valueOf(rest, 6)
sb.append(bd.stripTrailingZeros().toPlainString + "S")
}
} else if (interval.days == 0 && interval.months == 0) {
sb.append("T0S")
}
sb.toString()
}

private object ParseState extends Enumeration {
type ParseState = Value

Expand Down
Expand Up @@ -1799,23 +1799,6 @@ object SQLConf {
.checkValues(StoreAssignmentPolicy.values.map(_.toString))
.createWithDefault(StoreAssignmentPolicy.ANSI.toString)

object IntervalStyle extends Enumeration {
type IntervalStyle = Value
val SQL_STANDARD, ISO_8601, MULTI_UNITS = Value
}

val INTERVAL_STYLE = buildConf("spark.sql.intervalOutputStyle")
.doc("When converting interval values to strings (i.e. for display), this config decides the" +
" interval string format. The value SQL_STANDARD will produce output matching SQL standard" +
" interval literals (i.e. '+3-2 +10 -00:00:01'). The value ISO_8601 will produce output" +
" matching the ISO 8601 standard (i.e. 'P3Y2M10DT-1S'). The value MULTI_UNITS (which is the" +
" default) will produce output in form of value unit pairs, (i.e. '3 year 2 months 10 days" +
" -1 seconds'")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(IntervalStyle.values.map(_.toString))
.createWithDefault(IntervalStyle.MULTI_UNITS.toString)

val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled")
.doc("When true, Spark tries to conform to the ANSI SQL specification: 1. Spark will " +
"throw a runtime exception if an overflow occurs in any operation on integral/decimal " +
Expand Down Expand Up @@ -2667,8 +2650,6 @@ class SQLConf extends Serializable with Logging {
def storeAssignmentPolicy: StoreAssignmentPolicy.Value =
StoreAssignmentPolicy.withName(getConf(STORE_ASSIGNMENT_POLICY))

def intervalOutputStyle: IntervalStyle.Value = IntervalStyle.withName(getConf(INTERVAL_STYLE))

def ansiEnabled: Boolean = getConf(ANSI_ENABLED)

def nestedSchemaPruningEnabled: Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED)
Expand Down
Expand Up @@ -304,70 +304,6 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
}
}

test("to ansi sql standard string") {
val i1 = new CalendarInterval(0, 0, 0)
assert(IntervalUtils.toSqlStandardString(i1) === "0")
val i2 = new CalendarInterval(34, 0, 0)
assert(IntervalUtils.toSqlStandardString(i2) === "+2-10")
val i3 = new CalendarInterval(-34, 0, 0)
assert(IntervalUtils.toSqlStandardString(i3) === "-2-10")
val i4 = new CalendarInterval(0, 31, 0)
assert(IntervalUtils.toSqlStandardString(i4) === "+31")
val i5 = new CalendarInterval(0, -31, 0)
assert(IntervalUtils.toSqlStandardString(i5) === "-31")
val i6 = new CalendarInterval(0, 0, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123)
assert(IntervalUtils.toSqlStandardString(i6) === "+3:13:00.000123")
val i7 = new CalendarInterval(0, 0, -3 * MICROS_PER_HOUR - 13 * MICROS_PER_MINUTE - 123)
assert(IntervalUtils.toSqlStandardString(i7) === "-3:13:00.000123")
val i8 = new CalendarInterval(-34, 31, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123)
assert(IntervalUtils.toSqlStandardString(i8) === "-2-10 +31 +3:13:00.000123")
val i9 = new CalendarInterval(0, 0, -3000 * MICROS_PER_HOUR)
assert(IntervalUtils.toSqlStandardString(i9) === "-3000:00:00")
}

test("to iso 8601 string") {
val i1 = new CalendarInterval(0, 0, 0)
assert(IntervalUtils.toIso8601String(i1) === "PT0S")
val i2 = new CalendarInterval(34, 0, 0)
assert(IntervalUtils.toIso8601String(i2) === "P2Y10M")
val i3 = new CalendarInterval(-34, 0, 0)
assert(IntervalUtils.toIso8601String(i3) === "P-2Y-10M")
val i4 = new CalendarInterval(0, 31, 0)
assert(IntervalUtils.toIso8601String(i4) === "P31D")
val i5 = new CalendarInterval(0, -31, 0)
assert(IntervalUtils.toIso8601String(i5) === "P-31D")
val i6 = new CalendarInterval(0, 0, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123)
assert(IntervalUtils.toIso8601String(i6) === "PT3H13M0.000123S")
val i7 = new CalendarInterval(0, 0, -3 * MICROS_PER_HOUR - 13 * MICROS_PER_MINUTE - 123)
assert(IntervalUtils.toIso8601String(i7) === "PT-3H-13M-0.000123S")
val i8 = new CalendarInterval(-34, 31, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123)
assert(IntervalUtils.toIso8601String(i8) === "P-2Y-10M31DT3H13M0.000123S")
val i9 = new CalendarInterval(0, 0, -3000 * MICROS_PER_HOUR)
assert(IntervalUtils.toIso8601String(i9) === "PT-3000H")
}

test("to multi units string") {
val i1 = new CalendarInterval(0, 0, 0)
assert(IntervalUtils.toMultiUnitsString(i1) === "0 seconds")
val i2 = new CalendarInterval(34, 0, 0)
assert(IntervalUtils.toMultiUnitsString(i2) === "2 years 10 months")
val i3 = new CalendarInterval(-34, 0, 0)
assert(IntervalUtils.toMultiUnitsString(i3) === "-2 years -10 months")
val i4 = new CalendarInterval(0, 31, 0)
assert(IntervalUtils.toMultiUnitsString(i4) === "31 days")
val i5 = new CalendarInterval(0, -31, 0)
assert(IntervalUtils.toMultiUnitsString(i5) === "-31 days")
val i6 = new CalendarInterval(0, 0, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123)
assert(IntervalUtils.toMultiUnitsString(i6) === "3 hours 13 minutes 0.000123 seconds")
val i7 = new CalendarInterval(0, 0, -3 * MICROS_PER_HOUR - 13 * MICROS_PER_MINUTE - 123)
assert(IntervalUtils.toMultiUnitsString(i7) === "-3 hours -13 minutes -0.000123 seconds")
val i8 = new CalendarInterval(-34, 31, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123)
assert(IntervalUtils.toMultiUnitsString(i8) ===
"-2 years -10 months 31 days 3 hours 13 minutes 0.000123 seconds")
val i9 = new CalendarInterval(0, 0, -3000 * MICROS_PER_HOUR)
assert(IntervalUtils.toMultiUnitsString(i9) === "-3000 hours")
}

test("from day-time string") {
def check(input: String, from: IntervalUnit, to: IntervalUnit, expected: String): Unit = {
withClue(s"from = $from, to = $to") {
Expand Down
Expand Up @@ -22,10 +22,8 @@ import java.sql.{Date, Timestamp}

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.IntervalUtils._
import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.IntervalStyle._
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval

Expand Down Expand Up @@ -75,12 +73,7 @@ object HiveResult {
case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString
case (n, _: NumericType) => n.toString
case (s: String, StringType) => if (nested) "\"" + s + "\"" else s
case (interval: CalendarInterval, CalendarIntervalType) =>
SQLConf.get.intervalOutputStyle match {
case SQL_STANDARD => toSqlStandardString(interval)
case ISO_8601 => toIso8601String(interval)
case MULTI_UNITS => toMultiUnitsString(interval)
}
case (interval: CalendarInterval, CalendarIntervalType) => interval.toString
case (seq: Seq[_], ArrayType(typ, _)) =>
seq.map(v => (v, typ)).map(e => toHiveString(e, true)).mkString("[", ",", "]")
case (m: Map[_, _], MapType(kType, vType, _)) =>
Expand Down

This file was deleted.

This file was deleted.

14 changes: 0 additions & 14 deletions sql/core/src/test/resources/sql-tests/inputs/interval-display.sql

This file was deleted.

Expand Up @@ -272,12 +272,10 @@ SELECT interval '1 2:03:04' minute to second;
-- test output of couple non-standard interval values in the sql style
-- [SPARK-29406] Interval output styles
-- SET IntervalStyle TO sql_standard;
set spark.sql.intervalOutputStyle=SQL_STANDARD;
SELECT interval '1 day -1 hours',
interval '-1 days +1 hours',
interval '1 years 2 months -3 days 4 hours 5 minutes 6.789 seconds',
- interval '1 years 2 months -3 days 4 hours 5 minutes 6.789 seconds';
set spark.sql.intervalOutputStyle=MULTI_UNITS;
-- SELECT interval '1 day -1 hours',
-- interval '-1 days +1 hours',
-- interval '1 years 2 months -3 days 4 hours 5 minutes 6.789 seconds',
-- - interval '1 years 2 months -3 days 4 hours 5 minutes 6.789 seconds';

-- test outputting iso8601 intervals
-- [SPARK-29406] Interval output styles
Expand Down

This file was deleted.

This file was deleted.

0 comments on commit af70542

Please sign in to comment.