diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/dateExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/dateExpressions.scala new file mode 100644 index 0000000000000..cf55826e81fba --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/dateExpressions.scala @@ -0,0 +1,905 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import java.time.{LocalDate, ZoneId} +import java.time.temporal.IsoFields +import java.util.{Locale, TimeZone} + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.codegen._ +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.catalyst.util.DateTimeUtils._ +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +/** + * Common base class for time zone aware expressions. + */ +trait TimeZoneAwareExpression extends Expression { + /** The expression is only resolved when the time zone has been set. */ + override lazy val resolved: Boolean = + childrenResolved && checkInputDataTypes().isSuccess && timeZoneId.isDefined + + /** the timezone ID to be used to evaluate value. */ + def timeZoneId: Option[String] + + /** Returns a copy of this expression with the specified timeZoneId. */ + def withTimeZone(timeZoneId: String): TimeZoneAwareExpression + + @transient lazy val timeZone: TimeZone = DateTimeUtils.getTimeZone(timeZoneId.get) + @transient lazy val zoneId: ZoneId = DateTimeUtils.getZoneId(timeZoneId.get) +} + +/** + * Returns the current date at the start of query evaluation. + * All calls of current_date within the same query return the same value. + * + * There is no code generation since this expression should get constant folded by the optimizer. + */ +@ExpressionDescription( + usage = "_FUNC_() - Returns the current date at the start of query evaluation.", + since = "1.5.0") +case class CurrentDate(timeZoneId: Option[String] = None) + extends LeafExpression with TimeZoneAwareExpression with CodegenFallback { + + def this() = this(None) + + override def foldable: Boolean = true + override def nullable: Boolean = false + + override def dataType: DataType = DateType + + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + override def eval(input: InternalRow): Any = currentDate(zoneId) + + override def prettyName: String = "current_date" +} + +/** + * Adds a number of days to startdate. + */ +@ExpressionDescription( + usage = "_FUNC_(start_date, num_days) - Returns the date that is `num_days` after `start_date`.", + examples = """ + Examples: + > SELECT _FUNC_('2016-07-30', 1); + 2016-07-31 + """, + since = "1.5.0") +case class DateAdd(startDate: Expression, days: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def left: Expression = startDate + override def right: Expression = days + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType, IntegerType) + + override def dataType: DataType = DateType + + override def nullSafeEval(start: Any, d: Any): Any = { + start.asInstanceOf[Int] + d.asInstanceOf[Int] + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, (sd, d) => { + s"""${ev.value} = $sd + $d;""" + }) + } + + override def prettyName: String = "date_add" +} + +/** + * Subtracts a number of days to startdate. + */ +@ExpressionDescription( + usage = "_FUNC_(start_date, num_days) - Returns the date that is `num_days` before `start_date`.", + examples = """ + Examples: + > SELECT _FUNC_('2016-07-30', 1); + 2016-07-29 + """, + since = "1.5.0") +case class DateSub(startDate: Expression, days: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + override def left: Expression = startDate + override def right: Expression = days + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType, IntegerType) + + override def dataType: DataType = DateType + + override def nullSafeEval(start: Any, d: Any): Any = { + start.asInstanceOf[Int] - d.asInstanceOf[Int] + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, (sd, d) => { + s"""${ev.value} = $sd - $d;""" + }) + } + + override def prettyName: String = "date_sub" +} + +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the day of year of the date/timestamp.", + examples = """ + Examples: + > SELECT _FUNC_('2016-04-09'); + 100 + """, + since = "1.5.0") +case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getDayInYear(date.asInstanceOf[Int]) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getDayInYear($c)") + } +} + +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the year component of the date/timestamp.", + examples = """ + Examples: + > SELECT _FUNC_('2016-07-30'); + 2016 + """, + since = "1.5.0") +case class Year(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getYear(date.asInstanceOf[Int]) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getYear($c)") + } +} + +case class IsoYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getIsoYear(date.asInstanceOf[Int]) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getIsoYear($c)") + } +} + +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the quarter of the year for date, in the range 1 to 4.", + examples = """ + Examples: + > SELECT _FUNC_('2016-08-31'); + 3 + """, + since = "1.5.0") +case class Quarter(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getQuarter(date.asInstanceOf[Int]) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getQuarter($c)") + } +} + +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the month component of the date/timestamp.", + examples = """ + Examples: + > SELECT _FUNC_('2016-07-30'); + 7 + """, + since = "1.5.0") +case class Month(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getMonth(date.asInstanceOf[Int]) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getMonth($c)") + } +} + +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the day of month of the date/timestamp.", + examples = """ + Examples: + > SELECT _FUNC_('2009-07-30'); + 30 + """, + since = "1.5.0") +case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getDayOfMonth(date.asInstanceOf[Int]) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getDayOfMonth($c)") + } +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the day of the week for date/timestamp (1 = Sunday, 2 = Monday, ..., 7 = Saturday).", + examples = """ + Examples: + > SELECT _FUNC_('2009-07-30'); + 5 + """, + since = "2.3.0") +// scalastyle:on line.size.limit +case class DayOfWeek(child: Expression) extends DayWeek { + + override protected def nullSafeEval(date: Any): Any = { + val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int]) + localDate.getDayOfWeek.plus(1).getValue + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, days => { + s""" + ${ev.value} = java.time.LocalDate.ofEpochDay($days).getDayOfWeek().plus(1).getValue(); + """ + }) + } +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the day of the week for date/timestamp (0 = Monday, 1 = Tuesday, ..., 6 = Sunday).", + examples = """ + Examples: + > SELECT _FUNC_('2009-07-30'); + 3 + """, + since = "2.4.0") +// scalastyle:on line.size.limit +case class WeekDay(child: Expression) extends DayWeek { + + override protected def nullSafeEval(date: Any): Any = { + val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int]) + localDate.getDayOfWeek.ordinal() + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, days => { + s""" + ${ev.value} = java.time.LocalDate.ofEpochDay($days).getDayOfWeek().ordinal(); + """ + }) + } +} + +abstract class DayWeek extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.", + examples = """ + Examples: + > SELECT _FUNC_('2008-02-20'); + 8 + """, + since = "1.5.0") +// scalastyle:on line.size.limit +case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int]) + localDate.get(IsoFields.WEEK_OF_WEEK_BASED_YEAR) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, days => { + s""" + |${ev.value} = java.time.LocalDate.ofEpochDay($days).get( + | java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR); + """.stripMargin + }) + } +} + +/** + * Returns the last day of the month which the date belongs to. + */ +@ExpressionDescription( + usage = "_FUNC_(date) - Returns the last day of the month which the date belongs to.", + examples = """ + Examples: + > SELECT _FUNC_('2009-01-12'); + 2009-01-31 + """, + since = "1.5.0") +case class LastDay(startDate: Expression) extends UnaryExpression with ImplicitCastInputTypes { + override def child: Expression = startDate + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = DateType + + override def nullSafeEval(date: Any): Any = { + DateTimeUtils.getLastDayOfMonth(date.asInstanceOf[Int]) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, sd => s"$dtu.getLastDayOfMonth($sd)") + } + + override def prettyName: String = "last_day" +} + +/** + * Returns the first date which is later than startDate and named as dayOfWeek. + * For example, NextDay(2015-07-27, Sunday) would return 2015-08-02, which is the first + * Sunday later than 2015-07-27. + * + * Allowed "dayOfWeek" is defined in [[DateTimeUtils.getDayOfWeekFromString]]. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(start_date, day_of_week) - Returns the first date which is later than `start_date` and named as indicated.", + examples = """ + Examples: + > SELECT _FUNC_('2015-01-14', 'TU'); + 2015-01-20 + """, + since = "1.5.0") +// scalastyle:on line.size.limit +case class NextDay(startDate: Expression, dayOfWeek: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def left: Expression = startDate + override def right: Expression = dayOfWeek + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) + + override def dataType: DataType = DateType + override def nullable: Boolean = true + + override def nullSafeEval(start: Any, dayOfW: Any): Any = { + val dow = DateTimeUtils.getDayOfWeekFromString(dayOfW.asInstanceOf[UTF8String]) + if (dow == -1) { + null + } else { + val sd = start.asInstanceOf[Int] + DateTimeUtils.getNextDateForDayOfWeek(sd, dow) + } + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, (sd, dowS) => { + val dateTimeUtilClass = DateTimeUtils.getClass.getName.stripSuffix("$") + val dayOfWeekTerm = ctx.freshName("dayOfWeek") + if (dayOfWeek.foldable) { + val input = dayOfWeek.eval().asInstanceOf[UTF8String] + if ((input eq null) || DateTimeUtils.getDayOfWeekFromString(input) == -1) { + s""" + |${ev.isNull} = true; + """.stripMargin + } else { + val dayOfWeekValue = DateTimeUtils.getDayOfWeekFromString(input) + s""" + |${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekValue); + """.stripMargin + } + } else { + s""" + |int $dayOfWeekTerm = $dateTimeUtilClass.getDayOfWeekFromString($dowS); + |if ($dayOfWeekTerm == -1) { + | ${ev.isNull} = true; + |} else { + | ${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekTerm); + |} + """.stripMargin + } + }) + } + + override def prettyName: String = "next_day" +} + +/** + * Returns the date that is num_months after start_date. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(start_date, num_months) - Returns the date that is `num_months` after `start_date`.", + examples = """ + Examples: + > SELECT _FUNC_('2016-08-31', 1); + 2016-09-30 + """, + since = "1.5.0") +// scalastyle:on line.size.limit +case class AddMonths(startDate: Expression, numMonths: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def left: Expression = startDate + override def right: Expression = numMonths + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType, IntegerType) + + override def dataType: DataType = DateType + + override def nullSafeEval(start: Any, months: Any): Any = { + DateTimeUtils.dateAddMonths(start.asInstanceOf[Int], months.asInstanceOf[Int]) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, (sd, m) => { + s"""$dtu.dateAddMonths($sd, $m)""" + }) + } + + override def prettyName: String = "add_months" +} + +/** + * Parses a column to a date based on the given format. + */ +@ExpressionDescription( + usage = """ + _FUNC_(date_str[, fmt]) - Parses the `date_str` expression with the `fmt` expression to + a date. Returns null with invalid input. By default, it follows casting rules to a date if + the `fmt` is omitted. + """, + examples = """ + Examples: + > SELECT _FUNC_('2009-07-30 04:17:52'); + 2009-07-30 + > SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd'); + 2016-12-31 + """, + since = "1.5.0") +case class ParseToDate(left: Expression, format: Option[Expression], child: Expression) + extends RuntimeReplaceable { + + def this(left: Expression, format: Expression) { + this(left, Option(format), + Cast(Cast(UnixTimestamp(left, format), TimestampType), DateType)) + } + + def this(left: Expression) = { + // backwards compatibility + this(left, None, Cast(left, DateType)) + } + + override def flatArguments: Iterator[Any] = Iterator(left, format) + override def sql: String = { + if (format.isDefined) { + s"$prettyName(${left.sql}, ${format.get.sql})" + } else { + s"$prettyName(${left.sql})" + } + } + + override def prettyName: String = "to_date" +} + +trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes { + val instant: Expression + val format: Expression + override def nullable: Boolean = true + + private lazy val truncLevel: Int = + DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) + + /** + * @param input internalRow (time) + * @param minLevel Minimum level that can be used for truncation (e.g WEEK for Date input) + * @param truncFunc function: (time, level) => time + */ + protected def evalHelper(input: InternalRow, minLevel: Int)( + truncFunc: (Any, Int) => Any): Any = { + val level = if (format.foldable) { + truncLevel + } else { + DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) + } + if (level < minLevel) { + // unknown format or too small level + null + } else { + val t = instant.eval(input) + if (t == null) { + null + } else { + truncFunc(t, level) + } + } + } + + protected def codeGenHelper( + ctx: CodegenContext, + ev: ExprCode, + minLevel: Int, + orderReversed: Boolean = false)( + truncFunc: (String, String) => String) + : ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + + val javaType = CodeGenerator.javaType(dataType) + if (format.foldable) { + if (truncLevel < minLevel) { + ev.copy(code = code""" + boolean ${ev.isNull} = true; + $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};""") + } else { + val t = instant.genCode(ctx) + val truncFuncStr = truncFunc(t.value, truncLevel.toString) + ev.copy(code = code""" + ${t.code} + boolean ${ev.isNull} = ${t.isNull}; + $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; + if (!${ev.isNull}) { + ${ev.value} = $dtu.$truncFuncStr; + }""") + } + } else { + nullSafeCodeGen(ctx, ev, (left, right) => { + val form = ctx.freshName("form") + val (dateVal, fmt) = if (orderReversed) { + (right, left) + } else { + (left, right) + } + val truncFuncStr = truncFunc(dateVal, form) + s""" + int $form = $dtu.parseTruncLevel($fmt); + if ($form < $minLevel) { + ${ev.isNull} = true; + } else { + ${ev.value} = $dtu.$truncFuncStr + } + """ + }) + } + } +} + +/** + * Returns date truncated to the unit specified by the format. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`. + `fmt` should be one of ["week", "mon", "month", "mm", "quarter", "year", "yyyy", "yy", "decade", "century", "millennium"] + """, + examples = """ + Examples: + > SELECT _FUNC_('2019-08-04', 'week'); + 2019-07-29 + > SELECT _FUNC_('2019-08-04', 'quarter'); + 2019-07-01 + > SELECT _FUNC_('2009-02-12', 'MM'); + 2009-02-01 + > SELECT _FUNC_('2015-10-27', 'YEAR'); + 2015-01-01 + > SELECT _FUNC_('2015-10-27', 'DECADE'); + 2010-01-01 + > SELECT _FUNC_('1981-01-19', 'century'); + 1901-01-01 + > SELECT _FUNC_('1981-01-19', 'millennium'); + 1001-01-01 + """, + since = "1.5.0") +// scalastyle:on line.size.limit +case class TruncDate(date: Expression, format: Expression) + extends TruncInstant { + override def left: Expression = date + override def right: Expression = format + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) + override def dataType: DataType = DateType + override def prettyName: String = "trunc" + override val instant = date + + override def eval(input: InternalRow): Any = { + evalHelper(input, minLevel = MIN_LEVEL_OF_DATE_TRUNC) { (d: Any, level: Int) => + DateTimeUtils.truncDate(d.asInstanceOf[Int], level) + } + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + codeGenHelper(ctx, ev, minLevel = MIN_LEVEL_OF_DATE_TRUNC) { + (date: String, fmt: String) => s"truncDate($date, $fmt);" + } + } +} + +/** + * Returns the number of days from startDate to endDate. + */ +@ExpressionDescription( + usage = "_FUNC_(endDate, startDate) - Returns the number of days from `startDate` to `endDate`.", + examples = """ + Examples: + > SELECT _FUNC_('2009-07-31', '2009-07-30'); + 1 + + > SELECT _FUNC_('2009-07-30', '2009-07-31'); + -1 + """, + since = "1.5.0") +case class DateDiff(endDate: Expression, startDate: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def left: Expression = endDate + override def right: Expression = startDate + override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType) + override def dataType: DataType = IntegerType + + override def nullSafeEval(end: Any, start: Any): Any = { + end.asInstanceOf[Int] - start.asInstanceOf[Int] + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + defineCodeGen(ctx, ev, (end, start) => s"$end - $start") + } +} + +@ExpressionDescription( + usage = "_FUNC_(year, month, day) - Create date from year, month and day fields.", + arguments = """ + Arguments: + * year - the year to represent, from 1 to 9999 + * month - the month-of-year to represent, from 1 (January) to 12 (December) + * day - the day-of-month to represent, from 1 to 31 + """, + examples = """ + Examples: + > SELECT _FUNC_(2013, 7, 15); + 2013-07-15 + > SELECT _FUNC_(2019, 13, 1); + NULL + > SELECT _FUNC_(2019, 7, NULL); + NULL + > SELECT _FUNC_(2019, 2, 30); + NULL + """, + since = "3.0.0") +case class MakeDate(year: Expression, month: Expression, day: Expression) + extends TernaryExpression with ImplicitCastInputTypes { + + override def children: Seq[Expression] = Seq(year, month, day) + override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, IntegerType) + override def dataType: DataType = DateType + override def nullable: Boolean = true + + override def nullSafeEval(year: Any, month: Any, day: Any): Any = { + try { + val ld = LocalDate.of(year.asInstanceOf[Int], month.asInstanceOf[Int], day.asInstanceOf[Int]) + localDateToDays(ld) + } catch { + case _: java.time.DateTimeException => null + } + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + nullSafeCodeGen(ctx, ev, (year, month, day) => { + s""" + try { + ${ev.value} = $dtu.localDateToDays(java.time.LocalDate.of($year, $month, $day)); + } catch (java.time.DateTimeException e) { + ${ev.isNull} = true; + }""" + }) + } + + override def prettyName: String = "make_date" +} + +case class Millennium(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getMillennium(date.asInstanceOf[Int]) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getMillennium($c)") + } +} + +case class Century(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getCentury(date.asInstanceOf[Int]) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getCentury($c)") + } +} + +case class Decade(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = IntegerType + + override protected def nullSafeEval(date: Any): Any = { + DateTimeUtils.getDecade(date.asInstanceOf[Int]) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, c => s"$dtu.getDecade($c)") + } +} + +object DatePart { + + def parseExtractField( + extractField: String, + source: Expression, + errorHandleFunc: => Nothing): Expression = extractField.toUpperCase(Locale.ROOT) match { + case "MILLENNIUM" | "MILLENNIA" | "MIL" | "MILS" => Millennium(source) + case "CENTURY" | "CENTURIES" | "C" | "CENT" => Century(source) + case "DECADE" | "DECADES" | "DEC" | "DECS" => Decade(source) + case "YEAR" | "Y" | "YEARS" | "YR" | "YRS" => Year(source) + case "ISOYEAR" => IsoYear(source) + case "QUARTER" | "QTR" => Quarter(source) + case "MONTH" | "MON" | "MONS" | "MONTHS" => Month(source) + case "WEEK" | "W" | "WEEKS" => WeekOfYear(source) + case "DAY" | "D" | "DAYS" => DayOfMonth(source) + case "DAYOFWEEK" => DayOfWeek(source) + case "DOW" => Subtract(DayOfWeek(source), Literal(1)) + case "ISODOW" => Add(WeekDay(source), Literal(1)) + case "DOY" => DayOfYear(source) + case "HOUR" | "H" | "HOURS" | "HR" | "HRS" => Hour(source) + case "MINUTE" | "M" | "MIN" | "MINS" | "MINUTES" => Minute(source) + case "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => SecondWithFraction(source) + case "MILLISECONDS" | "MSEC" | "MSECS" | "MILLISECON" | "MSECONDS" | "MS" => + Milliseconds(source) + case "MICROSECONDS" | "USEC" | "USECS" | "USECONDS" | "MICROSECON" | "US" => + Microseconds(source) + case "EPOCH" => Epoch(source) + case _ => errorHandleFunc + } +} + +@ExpressionDescription( + usage = "_FUNC_(field, source) - Extracts a part of the date/timestamp.", + arguments = """ + Arguments: + * field - selects which part of the source should be extracted. Supported string values are: + ["MILLENNIUM", ("MILLENNIA", "MIL", "MILS"), + "CENTURY", ("CENTURIES", "C", "CENT"), + "DECADE", ("DECADES", "DEC", "DECS"), + "YEAR", ("Y", "YEARS", "YR", "YRS"), + "ISOYEAR", + "QUARTER", ("QTR"), + "MONTH", ("MON", "MONS", "MONTHS"), + "WEEK", ("W", "WEEKS"), + "DAY", ("D", "DAYS"), + "DAYOFWEEK", + "DOW", + "ISODOW", + "DOY", + "HOUR", ("H", "HOURS", "HR", "HRS"), + "MINUTE", ("M", "MIN", "MINS", "MINUTES"), + "SECOND", ("S", "SEC", "SECONDS", "SECS"), + "MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS"), + "MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US"), + "EPOCH"] + * source - a date (or timestamp) column from where `field` should be extracted + """, + examples = """ + Examples: + > SELECT _FUNC_('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456'); + 2019 + > SELECT _FUNC_('week', timestamp'2019-08-12 01:00:00.123456'); + 33 + > SELECT _FUNC_('doy', DATE'2019-08-12'); + 224 + > SELECT _FUNC_('SECONDS', timestamp'2019-10-01 00:00:01.000001'); + 1.000001 + """, + since = "3.0.0") +case class DatePart(field: Expression, source: Expression, child: Expression) + extends RuntimeReplaceable { + + def this(field: Expression, source: Expression) { + this(field, source, { + if (!field.foldable) { + throw new AnalysisException("The field parameter needs to be a foldable string value.") + } + val fieldEval = field.eval() + if (fieldEval == null) { + Literal(null, DoubleType) + } else { + val fieldStr = fieldEval.asInstanceOf[UTF8String].toString + DatePart.parseExtractField(fieldStr, source, { + throw new AnalysisException(s"Literals of type '$fieldStr' are currently not supported.") + }) + } + }) + } + + override def flatArguments: Iterator[Any] = Iterator(field, source) + override def sql: String = s"$prettyName(${field.sql}, ${source.sql})" + override def prettyName: String = "date_part" +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timestampExpressions.scala similarity index 62% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timestampExpressions.scala index 5aea884ad5003..9db1f5e899942 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timestampExpressions.scala @@ -18,8 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.Timestamp -import java.time.{DateTimeException, Instant, LocalDate, LocalDateTime, ZoneId} -import java.time.temporal.IsoFields +import java.time.{DateTimeException, LocalDateTime, ZoneId} import java.util.{Locale, TimeZone} import scala.util.control.NonFatal @@ -36,51 +35,6 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} -/** - * Common base class for time zone aware expressions. - */ -trait TimeZoneAwareExpression extends Expression { - /** The expression is only resolved when the time zone has been set. */ - override lazy val resolved: Boolean = - childrenResolved && checkInputDataTypes().isSuccess && timeZoneId.isDefined - - /** the timezone ID to be used to evaluate value. */ - def timeZoneId: Option[String] - - /** Returns a copy of this expression with the specified timeZoneId. */ - def withTimeZone(timeZoneId: String): TimeZoneAwareExpression - - @transient lazy val timeZone: TimeZone = DateTimeUtils.getTimeZone(timeZoneId.get) - @transient lazy val zoneId: ZoneId = DateTimeUtils.getZoneId(timeZoneId.get) -} - -/** - * Returns the current date at the start of query evaluation. - * All calls of current_date within the same query return the same value. - * - * There is no code generation since this expression should get constant folded by the optimizer. - */ -@ExpressionDescription( - usage = "_FUNC_() - Returns the current date at the start of query evaluation.", - since = "1.5.0") -case class CurrentDate(timeZoneId: Option[String] = None) - extends LeafExpression with TimeZoneAwareExpression with CodegenFallback { - - def this() = this(None) - - override def foldable: Boolean = true - override def nullable: Boolean = false - - override def dataType: DataType = DateType - - override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = - copy(timeZoneId = Option(timeZoneId)) - - override def eval(input: InternalRow): Any = currentDate(zoneId) - - override def prettyName: String = "current_date" -} - /** * Returns the current timestamp at the start of query evaluation. * All calls of current_timestamp within the same query return the same value. @@ -138,73 +92,6 @@ case class CurrentBatchTimestamp( } } -/** - * Adds a number of days to startdate. - */ -@ExpressionDescription( - usage = "_FUNC_(start_date, num_days) - Returns the date that is `num_days` after `start_date`.", - examples = """ - Examples: - > SELECT _FUNC_('2016-07-30', 1); - 2016-07-31 - """, - since = "1.5.0") -case class DateAdd(startDate: Expression, days: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - - override def left: Expression = startDate - override def right: Expression = days - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, IntegerType) - - override def dataType: DataType = DateType - - override def nullSafeEval(start: Any, d: Any): Any = { - start.asInstanceOf[Int] + d.asInstanceOf[Int] - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, (sd, d) => { - s"""${ev.value} = $sd + $d;""" - }) - } - - override def prettyName: String = "date_add" -} - -/** - * Subtracts a number of days to startdate. - */ -@ExpressionDescription( - usage = "_FUNC_(start_date, num_days) - Returns the date that is `num_days` before `start_date`.", - examples = """ - Examples: - > SELECT _FUNC_('2016-07-30', 1); - 2016-07-29 - """, - since = "1.5.0") -case class DateSub(startDate: Expression, days: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = startDate - override def right: Expression = days - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, IntegerType) - - override def dataType: DataType = DateType - - override def nullSafeEval(start: Any, d: Any): Any = { - start.asInstanceOf[Int] - d.asInstanceOf[Int] - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, (sd, d) => { - s"""${ev.value} = $sd - $d;""" - }) - } - - override def prettyName: String = "date_sub" -} - @ExpressionDescription( usage = "_FUNC_(timestamp) - Returns the hour component of the string/timestamp.", examples = """ @@ -364,232 +251,6 @@ case class Microseconds(child: Expression, timeZoneId: Option[String] = None) } } -@ExpressionDescription( - usage = "_FUNC_(date) - Returns the day of year of the date/timestamp.", - examples = """ - Examples: - > SELECT _FUNC_('2016-04-09'); - 100 - """, - since = "1.5.0") -case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = IntegerType - - override protected def nullSafeEval(date: Any): Any = { - DateTimeUtils.getDayInYear(date.asInstanceOf[Int]) - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, c => s"$dtu.getDayInYear($c)") - } -} - -@ExpressionDescription( - usage = "_FUNC_(date) - Returns the year component of the date/timestamp.", - examples = """ - Examples: - > SELECT _FUNC_('2016-07-30'); - 2016 - """, - since = "1.5.0") -case class Year(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = IntegerType - - override protected def nullSafeEval(date: Any): Any = { - DateTimeUtils.getYear(date.asInstanceOf[Int]) - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, c => s"$dtu.getYear($c)") - } -} - -case class IsoYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = IntegerType - - override protected def nullSafeEval(date: Any): Any = { - DateTimeUtils.getIsoYear(date.asInstanceOf[Int]) - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, c => s"$dtu.getIsoYear($c)") - } -} - -@ExpressionDescription( - usage = "_FUNC_(date) - Returns the quarter of the year for date, in the range 1 to 4.", - examples = """ - Examples: - > SELECT _FUNC_('2016-08-31'); - 3 - """, - since = "1.5.0") -case class Quarter(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = IntegerType - - override protected def nullSafeEval(date: Any): Any = { - DateTimeUtils.getQuarter(date.asInstanceOf[Int]) - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, c => s"$dtu.getQuarter($c)") - } -} - -@ExpressionDescription( - usage = "_FUNC_(date) - Returns the month component of the date/timestamp.", - examples = """ - Examples: - > SELECT _FUNC_('2016-07-30'); - 7 - """, - since = "1.5.0") -case class Month(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = IntegerType - - override protected def nullSafeEval(date: Any): Any = { - DateTimeUtils.getMonth(date.asInstanceOf[Int]) - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, c => s"$dtu.getMonth($c)") - } -} - -@ExpressionDescription( - usage = "_FUNC_(date) - Returns the day of month of the date/timestamp.", - examples = """ - Examples: - > SELECT _FUNC_('2009-07-30'); - 30 - """, - since = "1.5.0") -case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = IntegerType - - override protected def nullSafeEval(date: Any): Any = { - DateTimeUtils.getDayOfMonth(date.asInstanceOf[Int]) - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, c => s"$dtu.getDayOfMonth($c)") - } -} - -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date) - Returns the day of the week for date/timestamp (1 = Sunday, 2 = Monday, ..., 7 = Saturday).", - examples = """ - Examples: - > SELECT _FUNC_('2009-07-30'); - 5 - """, - since = "2.3.0") -// scalastyle:on line.size.limit -case class DayOfWeek(child: Expression) extends DayWeek { - - override protected def nullSafeEval(date: Any): Any = { - val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int]) - localDate.getDayOfWeek.plus(1).getValue - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, days => { - s""" - ${ev.value} = java.time.LocalDate.ofEpochDay($days).getDayOfWeek().plus(1).getValue(); - """ - }) - } -} - -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date) - Returns the day of the week for date/timestamp (0 = Monday, 1 = Tuesday, ..., 6 = Sunday).", - examples = """ - Examples: - > SELECT _FUNC_('2009-07-30'); - 3 - """, - since = "2.4.0") -// scalastyle:on line.size.limit -case class WeekDay(child: Expression) extends DayWeek { - - override protected def nullSafeEval(date: Any): Any = { - val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int]) - localDate.getDayOfWeek.ordinal() - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, days => { - s""" - ${ev.value} = java.time.LocalDate.ofEpochDay($days).getDayOfWeek().ordinal(); - """ - }) - } -} - -abstract class DayWeek extends UnaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = IntegerType -} - -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.", - examples = """ - Examples: - > SELECT _FUNC_('2008-02-20'); - 8 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = IntegerType - - override protected def nullSafeEval(date: Any): Any = { - val localDate = LocalDate.ofEpochDay(date.asInstanceOf[Int]) - localDate.get(IsoFields.WEEK_OF_WEEK_BASED_YEAR) - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, days => { - s""" - |${ev.value} = java.time.LocalDate.ofEpochDay($days).get( - | java.time.temporal.IsoFields.WEEK_OF_WEEK_BASED_YEAR); - """.stripMargin - }) - } -} - // scalastyle:off line.size.limit @ExpressionDescription( usage = "_FUNC_(timestamp, fmt) - Converts `timestamp` to a value of string in the format specified by the date format `fmt`.", @@ -647,459 +308,38 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti } /** - * Converts time string with given pattern. - * Deterministic version of [[UnixTimestamp]], must have at least one parameter. + * Adds an interval to timestamp. */ -@ExpressionDescription( - usage = "_FUNC_(expr[, pattern]) - Returns the UNIX timestamp of the given time.", - examples = """ - Examples: - > SELECT _FUNC_('2016-04-08', 'yyyy-MM-dd'); - 1460098800 - """, - since = "1.6.0") -case class ToUnixTimestamp( - timeExp: Expression, - format: Expression, - timeZoneId: Option[String] = None) - extends UnixTime { +case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[String] = None) + extends BinaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes { - def this(timeExp: Expression, format: Expression) = this(timeExp, format, None) + def this(start: Expression, interval: Expression) = this(start, interval, None) - override def left: Expression = timeExp - override def right: Expression = format + override def left: Expression = start + override def right: Expression = interval + + override def toString: String = s"$left + $right" + override def sql: String = s"${left.sql} + ${right.sql}" + override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, CalendarIntervalType) + + override def dataType: DataType = TimestampType override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) - def this(time: Expression) = { - this(time, Literal("uuuu-MM-dd HH:mm:ss")) + override def nullSafeEval(start: Any, interval: Any): Any = { + val itvl = interval.asInstanceOf[CalendarInterval] + DateTimeUtils.timestampAddInterval( + start.asInstanceOf[Long], itvl.months, itvl.microseconds, zoneId) } - override def prettyName: String = "to_unix_timestamp" -} - -/** - * Converts time string with given pattern to Unix time stamp (in seconds), returns null if fail. - * See [https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html]. - * Note that hive Language Manual says it returns 0 if fail, but in fact it returns null. - * If the second parameter is missing, use "uuuu-MM-dd HH:mm:ss". - * If no parameters provided, the first parameter will be current_timestamp. - * If the first parameter is a Date or Timestamp instead of String, we will ignore the - * second parameter. - */ -@ExpressionDescription( - usage = "_FUNC_([expr[, pattern]]) - Returns the UNIX timestamp of current or specified time.", - examples = """ - Examples: - > SELECT _FUNC_(); - 1476884637 - > SELECT _FUNC_('2016-04-08', 'yyyy-MM-dd'); - 1460041200 - """, - since = "1.5.0") -case class UnixTimestamp(timeExp: Expression, format: Expression, timeZoneId: Option[String] = None) - extends UnixTime { - - def this(timeExp: Expression, format: Expression) = this(timeExp, format, None) - - override def left: Expression = timeExp - override def right: Expression = format - - override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = - copy(timeZoneId = Option(timeZoneId)) - - def this(time: Expression) = { - this(time, Literal("uuuu-MM-dd HH:mm:ss")) - } - - def this() = { - this(CurrentTimestamp()) - } - - override def prettyName: String = "unix_timestamp" -} - -abstract class ToTimestamp - extends BinaryExpression with TimeZoneAwareExpression with ExpectsInputTypes { - - // The result of the conversion to timestamp is microseconds divided by this factor. - // For example if the factor is 1000000, the result of the expression is in seconds. - protected def downScaleFactor: Long - - override def inputTypes: Seq[AbstractDataType] = - Seq(TypeCollection(StringType, DateType, TimestampType), StringType) - - override def dataType: DataType = LongType - override def nullable: Boolean = true - - private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String] - private lazy val formatter: TimestampFormatter = - try { - TimestampFormatter(constFormat.toString, zoneId) - } catch { - case NonFatal(_) => null - } - - override def eval(input: InternalRow): Any = { - val t = left.eval(input) - if (t == null) { - null - } else { - left.dataType match { - case DateType => - epochDaysToMicros(t.asInstanceOf[Int], zoneId) / downScaleFactor - case TimestampType => - t.asInstanceOf[Long] / downScaleFactor - case StringType if right.foldable => - if (constFormat == null || formatter == null) { - null - } else { - try { - formatter.parse( - t.asInstanceOf[UTF8String].toString) / downScaleFactor - } catch { - case NonFatal(_) => null - } - } - case StringType => - val f = right.eval(input) - if (f == null) { - null - } else { - val formatString = f.asInstanceOf[UTF8String].toString - try { - TimestampFormatter(formatString, zoneId).parse( - t.asInstanceOf[UTF8String].toString) / downScaleFactor - } catch { - case NonFatal(_) => null - } - } - } - } - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val javaType = CodeGenerator.javaType(dataType) - left.dataType match { - case StringType if right.foldable => - val df = classOf[TimestampFormatter].getName - if (formatter == null) { - ExprCode.forNullValue(dataType) - } else { - val formatterName = ctx.addReferenceObj("formatter", formatter, df) - val eval1 = left.genCode(ctx) - ev.copy(code = code""" - ${eval1.code} - boolean ${ev.isNull} = ${eval1.isNull}; - $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; - if (!${ev.isNull}) { - try { - ${ev.value} = $formatterName.parse(${eval1.value}.toString()) / $downScaleFactor; - } catch (java.lang.IllegalArgumentException e) { - ${ev.isNull} = true; - } catch (java.text.ParseException e) { - ${ev.isNull} = true; - } catch (java.time.format.DateTimeParseException e) { - ${ev.isNull} = true; - } catch (java.time.DateTimeException e) { - ${ev.isNull} = true; - } - }""") - } - case StringType => - val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) - val locale = ctx.addReferenceObj("locale", Locale.US) - val tf = TimestampFormatter.getClass.getName.stripSuffix("$") - nullSafeCodeGen(ctx, ev, (string, format) => { - s""" - try { - ${ev.value} = $tf$$.MODULE$$.apply($format.toString(), $zid, $locale) - .parse($string.toString()) / $downScaleFactor; - } catch (java.lang.IllegalArgumentException e) { - ${ev.isNull} = true; - } catch (java.text.ParseException e) { - ${ev.isNull} = true; - } catch (java.time.format.DateTimeParseException e) { - ${ev.isNull} = true; - } catch (java.time.DateTimeException e) { - ${ev.isNull} = true; - } - """ - }) - case TimestampType => - val eval1 = left.genCode(ctx) - ev.copy(code = code""" - ${eval1.code} - boolean ${ev.isNull} = ${eval1.isNull}; - $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; - if (!${ev.isNull}) { - ${ev.value} = ${eval1.value} / $downScaleFactor; - }""") - case DateType => - val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - val eval1 = left.genCode(ctx) - ev.copy(code = code""" - ${eval1.code} - boolean ${ev.isNull} = ${eval1.isNull}; - $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; - if (!${ev.isNull}) { - ${ev.value} = $dtu.epochDaysToMicros(${eval1.value}, $zid) / $downScaleFactor; - }""") - } - } -} - -abstract class UnixTime extends ToTimestamp { - override val downScaleFactor: Long = MICROS_PER_SECOND -} - -/** - * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string - * representing the timestamp of that moment in the current system time zone in the given - * format. If the format is missing, using format like "1970-01-01 00:00:00". - * Note that hive Language Manual says it returns 0 if fail, but in fact it returns null. - */ -@ExpressionDescription( - usage = "_FUNC_(unix_time, format) - Returns `unix_time` in the specified `format`.", - examples = """ - Examples: - > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss'); - 1969-12-31 16:00:00 - """, - since = "1.5.0") -case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[String] = None) - extends BinaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes { - - def this(sec: Expression, format: Expression) = this(sec, format, None) - - override def left: Expression = sec - override def right: Expression = format - - override def prettyName: String = "from_unixtime" - - def this(unix: Expression) = { - this(unix, Literal("uuuu-MM-dd HH:mm:ss")) - } - - override def dataType: DataType = StringType - override def nullable: Boolean = true - - override def inputTypes: Seq[AbstractDataType] = Seq(LongType, StringType) - - override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = - copy(timeZoneId = Option(timeZoneId)) - - private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String] - private lazy val formatter: TimestampFormatter = - try { - TimestampFormatter(constFormat.toString, zoneId) - } catch { - case NonFatal(_) => null - } - - override def eval(input: InternalRow): Any = { - val time = left.eval(input) - if (time == null) { - null - } else { - if (format.foldable) { - if (constFormat == null || formatter == null) { - null - } else { - try { - UTF8String.fromString(formatter.format(time.asInstanceOf[Long] * MICROS_PER_SECOND)) - } catch { - case NonFatal(_) => null - } - } - } else { - val f = format.eval(input) - if (f == null) { - null - } else { - try { - UTF8String.fromString(TimestampFormatter(f.toString, zoneId) - .format(time.asInstanceOf[Long] * MICROS_PER_SECOND)) - } catch { - case NonFatal(_) => null - } - } - } - } - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val df = classOf[TimestampFormatter].getName - if (format.foldable) { - if (formatter == null) { - ExprCode.forNullValue(StringType) - } else { - val formatterName = ctx.addReferenceObj("formatter", formatter, df) - val t = left.genCode(ctx) - ev.copy(code = code""" - ${t.code} - boolean ${ev.isNull} = ${t.isNull}; - ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; - if (!${ev.isNull}) { - try { - ${ev.value} = UTF8String.fromString($formatterName.format(${t.value} * 1000000L)); - } catch (java.lang.IllegalArgumentException e) { - ${ev.isNull} = true; - } - }""") - } - } else { - val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) - val locale = ctx.addReferenceObj("locale", Locale.US) - val tf = TimestampFormatter.getClass.getName.stripSuffix("$") - nullSafeCodeGen(ctx, ev, (seconds, f) => { - s""" - try { - ${ev.value} = UTF8String.fromString($tf$$.MODULE$$.apply($f.toString(), $zid, $locale). - format($seconds * 1000000L)); - } catch (java.lang.IllegalArgumentException e) { - ${ev.isNull} = true; - }""" - }) - } - } -} - -/** - * Returns the last day of the month which the date belongs to. - */ -@ExpressionDescription( - usage = "_FUNC_(date) - Returns the last day of the month which the date belongs to.", - examples = """ - Examples: - > SELECT _FUNC_('2009-01-12'); - 2009-01-31 - """, - since = "1.5.0") -case class LastDay(startDate: Expression) extends UnaryExpression with ImplicitCastInputTypes { - override def child: Expression = startDate - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = DateType - - override def nullSafeEval(date: Any): Any = { - DateTimeUtils.getLastDayOfMonth(date.asInstanceOf[Int]) - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, sd => s"$dtu.getLastDayOfMonth($sd)") - } - - override def prettyName: String = "last_day" -} - -/** - * Returns the first date which is later than startDate and named as dayOfWeek. - * For example, NextDay(2015-07-27, Sunday) would return 2015-08-02, which is the first - * Sunday later than 2015-07-27. - * - * Allowed "dayOfWeek" is defined in [[DateTimeUtils.getDayOfWeekFromString]]. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(start_date, day_of_week) - Returns the first date which is later than `start_date` and named as indicated.", - examples = """ - Examples: - > SELECT _FUNC_('2015-01-14', 'TU'); - 2015-01-20 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class NextDay(startDate: Expression, dayOfWeek: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - - override def left: Expression = startDate - override def right: Expression = dayOfWeek - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - - override def dataType: DataType = DateType - override def nullable: Boolean = true - - override def nullSafeEval(start: Any, dayOfW: Any): Any = { - val dow = DateTimeUtils.getDayOfWeekFromString(dayOfW.asInstanceOf[UTF8String]) - if (dow == -1) { - null - } else { - val sd = start.asInstanceOf[Int] - DateTimeUtils.getNextDateForDayOfWeek(sd, dow) - } - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, (sd, dowS) => { - val dateTimeUtilClass = DateTimeUtils.getClass.getName.stripSuffix("$") - val dayOfWeekTerm = ctx.freshName("dayOfWeek") - if (dayOfWeek.foldable) { - val input = dayOfWeek.eval().asInstanceOf[UTF8String] - if ((input eq null) || DateTimeUtils.getDayOfWeekFromString(input) == -1) { - s""" - |${ev.isNull} = true; - """.stripMargin - } else { - val dayOfWeekValue = DateTimeUtils.getDayOfWeekFromString(input) - s""" - |${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekValue); - """.stripMargin - } - } else { - s""" - |int $dayOfWeekTerm = $dateTimeUtilClass.getDayOfWeekFromString($dowS); - |if ($dayOfWeekTerm == -1) { - | ${ev.isNull} = true; - |} else { - | ${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekTerm); - |} - """.stripMargin - } - }) - } - - override def prettyName: String = "next_day" -} - -/** - * Adds an interval to timestamp. - */ -case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[String] = None) - extends BinaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes { - - def this(start: Expression, interval: Expression) = this(start, interval, None) - - override def left: Expression = start - override def right: Expression = interval - - override def toString: String = s"$left + $right" - override def sql: String = s"${left.sql} + ${right.sql}" - override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, CalendarIntervalType) - - override def dataType: DataType = TimestampType - - override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = - copy(timeZoneId = Option(timeZoneId)) - - override def nullSafeEval(start: Any, interval: Any): Any = { - val itvl = interval.asInstanceOf[CalendarInterval] - DateTimeUtils.timestampAddInterval( - start.asInstanceOf[Long], itvl.months, itvl.microseconds, zoneId) - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, (sd, i) => { - s"""$dtu.timestampAddInterval($sd, $i.months, $i.microseconds, $zid)""" - }) - } + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, (sd, i) => { + s"""$dtu.timestampAddInterval($sd, $i.months, $i.microseconds, $zid)""" + }) + } } /** @@ -1217,43 +457,6 @@ case class TimeSub(start: Expression, interval: Expression, timeZoneId: Option[S } } -/** - * Returns the date that is num_months after start_date. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(start_date, num_months) - Returns the date that is `num_months` after `start_date`.", - examples = """ - Examples: - > SELECT _FUNC_('2016-08-31', 1); - 2016-09-30 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class AddMonths(startDate: Expression, numMonths: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - - override def left: Expression = startDate - override def right: Expression = numMonths - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, IntegerType) - - override def dataType: DataType = DateType - - override def nullSafeEval(start: Any, months: Any): Any = { - DateTimeUtils.dateAddMonths(start.asInstanceOf[Int], months.asInstanceOf[Int]) - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, (sd, m) => { - s"""$dtu.dateAddMonths($sd, $m)""" - }) - } - - override def prettyName: String = "add_months" -} - /** * Returns number of months between times `timestamp1` and `timestamp2`. * If `timestamp1` is later than `timestamp2`, then the result is positive. @@ -1387,55 +590,13 @@ case class ToUTCTimestamp(left: Expression, right: Expression) | ${ev.value} = $dtu.convertTz(${eval.value}, $tzTerm, $utcTerm); |} """.stripMargin) - } - } else { - defineCodeGen(ctx, ev, (timestamp, format) => { - s"""$dtu.toUTCTime($timestamp, $format.toString())""" - }) - } - } -} - -/** - * Parses a column to a date based on the given format. - */ -@ExpressionDescription( - usage = """ - _FUNC_(date_str[, fmt]) - Parses the `date_str` expression with the `fmt` expression to - a date. Returns null with invalid input. By default, it follows casting rules to a date if - the `fmt` is omitted. - """, - examples = """ - Examples: - > SELECT _FUNC_('2009-07-30 04:17:52'); - 2009-07-30 - > SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd'); - 2016-12-31 - """, - since = "1.5.0") -case class ParseToDate(left: Expression, format: Option[Expression], child: Expression) - extends RuntimeReplaceable { - - def this(left: Expression, format: Expression) { - this(left, Option(format), - Cast(Cast(UnixTimestamp(left, format), TimestampType), DateType)) - } - - def this(left: Expression) = { - // backwards compatibility - this(left, None, Cast(left, DateType)) - } - - override def flatArguments: Iterator[Any] = Iterator(left, format) - override def sql: String = { - if (format.isDefined) { - s"$prettyName(${left.sql}, ${format.get.sql})" + } } else { - s"$prettyName(${left.sql})" + defineCodeGen(ctx, ev, (timestamp, format) => { + s"""$dtu.toUTCTime($timestamp, $format.toString())""" + }) } } - - override def prettyName: String = "to_date" } /** @@ -1477,138 +638,6 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes { - val instant: Expression - val format: Expression - override def nullable: Boolean = true - - private lazy val truncLevel: Int = - DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - - /** - * @param input internalRow (time) - * @param minLevel Minimum level that can be used for truncation (e.g WEEK for Date input) - * @param truncFunc function: (time, level) => time - */ - protected def evalHelper(input: InternalRow, minLevel: Int)( - truncFunc: (Any, Int) => Any): Any = { - val level = if (format.foldable) { - truncLevel - } else { - DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - } - if (level < minLevel) { - // unknown format or too small level - null - } else { - val t = instant.eval(input) - if (t == null) { - null - } else { - truncFunc(t, level) - } - } - } - - protected def codeGenHelper( - ctx: CodegenContext, - ev: ExprCode, - minLevel: Int, - orderReversed: Boolean = false)( - truncFunc: (String, String) => String) - : ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - - val javaType = CodeGenerator.javaType(dataType) - if (format.foldable) { - if (truncLevel < minLevel) { - ev.copy(code = code""" - boolean ${ev.isNull} = true; - $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};""") - } else { - val t = instant.genCode(ctx) - val truncFuncStr = truncFunc(t.value, truncLevel.toString) - ev.copy(code = code""" - ${t.code} - boolean ${ev.isNull} = ${t.isNull}; - $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; - if (!${ev.isNull}) { - ${ev.value} = $dtu.$truncFuncStr; - }""") - } - } else { - nullSafeCodeGen(ctx, ev, (left, right) => { - val form = ctx.freshName("form") - val (dateVal, fmt) = if (orderReversed) { - (right, left) - } else { - (left, right) - } - val truncFuncStr = truncFunc(dateVal, form) - s""" - int $form = $dtu.parseTruncLevel($fmt); - if ($form < $minLevel) { - ${ev.isNull} = true; - } else { - ${ev.value} = $dtu.$truncFuncStr - } - """ - }) - } - } -} - -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = """ - _FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`. - `fmt` should be one of ["week", "mon", "month", "mm", "quarter", "year", "yyyy", "yy", "decade", "century", "millennium"] - """, - examples = """ - Examples: - > SELECT _FUNC_('2019-08-04', 'week'); - 2019-07-29 - > SELECT _FUNC_('2019-08-04', 'quarter'); - 2019-07-01 - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - > SELECT _FUNC_('2015-10-27', 'DECADE'); - 2010-01-01 - > SELECT _FUNC_('1981-01-19', 'century'); - 1901-01-01 - > SELECT _FUNC_('1981-01-19', 'millennium'); - 1001-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends TruncInstant { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType - override def prettyName: String = "trunc" - override val instant = date - - override def eval(input: InternalRow): Any = { - evalHelper(input, minLevel = MIN_LEVEL_OF_DATE_TRUNC) { (d: Any, level: Int) => - DateTimeUtils.truncDate(d.asInstanceOf[Int], level) - } - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - codeGenHelper(ctx, ev, minLevel = MIN_LEVEL_OF_DATE_TRUNC) { - (date: String, fmt: String) => s"truncDate($date, $fmt);" - } - } -} - /** * Returns timestamp truncated to the unit specified by the format. */ @@ -1637,137 +666,375 @@ case class TruncDate(date: Expression, format: Expression) > SELECT _FUNC_('CENTURY', '2015-03-05T09:32:05.123456'); 2001-01-01 00:00:00 """, - since = "2.3.0") -// scalastyle:on line.size.limit -case class TruncTimestamp( + since = "2.3.0") +// scalastyle:on line.size.limit +case class TruncTimestamp( + format: Expression, + timestamp: Expression, + timeZoneId: Option[String] = None) + extends TruncInstant with TimeZoneAwareExpression { + override def left: Expression = format + override def right: Expression = timestamp + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType) + override def dataType: TimestampType = TimestampType + override def prettyName: String = "date_trunc" + override val instant = timestamp + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + def this(format: Expression, timestamp: Expression) = this(format, timestamp, None) + + override def eval(input: InternalRow): Any = { + evalHelper(input, minLevel = MIN_LEVEL_OF_TIMESTAMP_TRUNC) { (t: Any, level: Int) => + DateTimeUtils.truncTimestamp(t.asInstanceOf[Long], level, timeZone) + } + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val tz = ctx.addReferenceObj("timeZone", timeZone) + codeGenHelper(ctx, ev, minLevel = MIN_LEVEL_OF_TIMESTAMP_TRUNC, true) { + (date: String, fmt: String) => + s"truncTimestamp($date, $fmt, $tz);" + } + } +} + +abstract class ToTimestamp + extends BinaryExpression with TimeZoneAwareExpression with ExpectsInputTypes { + + // The result of the conversion to timestamp is microseconds divided by this factor. + // For example if the factor is 1000000, the result of the expression is in seconds. + protected def downScaleFactor: Long + + override def inputTypes: Seq[AbstractDataType] = + Seq(TypeCollection(StringType, DateType, TimestampType), StringType) + + override def dataType: DataType = LongType + override def nullable: Boolean = true + + private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String] + private lazy val formatter: TimestampFormatter = + try { + TimestampFormatter(constFormat.toString, zoneId) + } catch { + case NonFatal(_) => null + } + + override def eval(input: InternalRow): Any = { + val t = left.eval(input) + if (t == null) { + null + } else { + left.dataType match { + case DateType => + epochDaysToMicros(t.asInstanceOf[Int], zoneId) / downScaleFactor + case TimestampType => + t.asInstanceOf[Long] / downScaleFactor + case StringType if right.foldable => + if (constFormat == null || formatter == null) { + null + } else { + try { + formatter.parse( + t.asInstanceOf[UTF8String].toString) / downScaleFactor + } catch { + case NonFatal(_) => null + } + } + case StringType => + val f = right.eval(input) + if (f == null) { + null + } else { + val formatString = f.asInstanceOf[UTF8String].toString + try { + TimestampFormatter(formatString, zoneId).parse( + t.asInstanceOf[UTF8String].toString) / downScaleFactor + } catch { + case NonFatal(_) => null + } + } + } + } + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val javaType = CodeGenerator.javaType(dataType) + left.dataType match { + case StringType if right.foldable => + val df = classOf[TimestampFormatter].getName + if (formatter == null) { + ExprCode.forNullValue(dataType) + } else { + val formatterName = ctx.addReferenceObj("formatter", formatter, df) + val eval1 = left.genCode(ctx) + ev.copy(code = code""" + ${eval1.code} + boolean ${ev.isNull} = ${eval1.isNull}; + $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; + if (!${ev.isNull}) { + try { + ${ev.value} = $formatterName.parse(${eval1.value}.toString()) / $downScaleFactor; + } catch (java.lang.IllegalArgumentException e) { + ${ev.isNull} = true; + } catch (java.text.ParseException e) { + ${ev.isNull} = true; + } catch (java.time.format.DateTimeParseException e) { + ${ev.isNull} = true; + } catch (java.time.DateTimeException e) { + ${ev.isNull} = true; + } + }""") + } + case StringType => + val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) + val locale = ctx.addReferenceObj("locale", Locale.US) + val tf = TimestampFormatter.getClass.getName.stripSuffix("$") + nullSafeCodeGen(ctx, ev, (string, format) => { + s""" + try { + ${ev.value} = $tf$$.MODULE$$.apply($format.toString(), $zid, $locale) + .parse($string.toString()) / $downScaleFactor; + } catch (java.lang.IllegalArgumentException e) { + ${ev.isNull} = true; + } catch (java.text.ParseException e) { + ${ev.isNull} = true; + } catch (java.time.format.DateTimeParseException e) { + ${ev.isNull} = true; + } catch (java.time.DateTimeException e) { + ${ev.isNull} = true; + } + """ + }) + case TimestampType => + val eval1 = left.genCode(ctx) + ev.copy(code = code""" + ${eval1.code} + boolean ${ev.isNull} = ${eval1.isNull}; + $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; + if (!${ev.isNull}) { + ${ev.value} = ${eval1.value} / $downScaleFactor; + }""") + case DateType => + val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + val eval1 = left.genCode(ctx) + ev.copy(code = code""" + ${eval1.code} + boolean ${ev.isNull} = ${eval1.isNull}; + $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; + if (!${ev.isNull}) { + ${ev.value} = $dtu.epochDaysToMicros(${eval1.value}, $zid) / $downScaleFactor; + }""") + } + } +} + +abstract class UnixTime extends ToTimestamp { + override val downScaleFactor: Long = MICROS_PER_SECOND +} + +/** + * Gets timestamps from strings using given pattern. + */ +private case class GetTimestamp( + left: Expression, + right: Expression, + timeZoneId: Option[String] = None) + extends ToTimestamp { + + override val downScaleFactor = 1 + override def dataType: DataType = TimestampType + + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) +} + +/** + * Converts time string with given pattern. + * Deterministic version of [[UnixTimestamp]], must have at least one parameter. + */ +@ExpressionDescription( + usage = "_FUNC_(expr[, pattern]) - Returns the UNIX timestamp of the given time.", + examples = """ + Examples: + > SELECT _FUNC_('2016-04-08', 'yyyy-MM-dd'); + 1460098800 + """, + since = "1.6.0") +case class ToUnixTimestamp( + timeExp: Expression, format: Expression, - timestamp: Expression, timeZoneId: Option[String] = None) - extends TruncInstant with TimeZoneAwareExpression { - override def left: Expression = format - override def right: Expression = timestamp + extends UnixTime { + + def this(timeExp: Expression, format: Expression) = this(timeExp, format, None) + + override def left: Expression = timeExp + override def right: Expression = format - override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType) - override def dataType: TimestampType = TimestampType - override def prettyName: String = "date_trunc" - override val instant = timestamp override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) - def this(format: Expression, timestamp: Expression) = this(format, timestamp, None) - - override def eval(input: InternalRow): Any = { - evalHelper(input, minLevel = MIN_LEVEL_OF_TIMESTAMP_TRUNC) { (t: Any, level: Int) => - DateTimeUtils.truncTimestamp(t.asInstanceOf[Long], level, timeZone) - } + def this(time: Expression) = { + this(time, Literal("uuuu-MM-dd HH:mm:ss")) } - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val tz = ctx.addReferenceObj("timeZone", timeZone) - codeGenHelper(ctx, ev, minLevel = MIN_LEVEL_OF_TIMESTAMP_TRUNC, true) { - (date: String, fmt: String) => - s"truncTimestamp($date, $fmt, $tz);" - } - } + override def prettyName: String = "to_unix_timestamp" } /** - * Returns the number of days from startDate to endDate. + * Converts time string with given pattern to Unix time stamp (in seconds), returns null if fail. + * See [https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html]. + * Note that hive Language Manual says it returns 0 if fail, but in fact it returns null. + * If the second parameter is missing, use "uuuu-MM-dd HH:mm:ss". + * If no parameters provided, the first parameter will be current_timestamp. + * If the first parameter is a Date or Timestamp instead of String, we will ignore the + * second parameter. */ @ExpressionDescription( - usage = "_FUNC_(endDate, startDate) - Returns the number of days from `startDate` to `endDate`.", + usage = "_FUNC_([expr[, pattern]]) - Returns the UNIX timestamp of current or specified time.", examples = """ Examples: - > SELECT _FUNC_('2009-07-31', '2009-07-30'); - 1 - - > SELECT _FUNC_('2009-07-30', '2009-07-31'); - -1 + > SELECT _FUNC_(); + 1476884637 + > SELECT _FUNC_('2016-04-08', 'yyyy-MM-dd'); + 1460041200 """, since = "1.5.0") -case class DateDiff(endDate: Expression, startDate: Expression) - extends BinaryExpression with ImplicitCastInputTypes { +case class UnixTimestamp(timeExp: Expression, format: Expression, timeZoneId: Option[String] = None) + extends UnixTime { - override def left: Expression = endDate - override def right: Expression = startDate - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType) - override def dataType: DataType = IntegerType + def this(timeExp: Expression, format: Expression) = this(timeExp, format, None) - override def nullSafeEval(end: Any, start: Any): Any = { - end.asInstanceOf[Int] - start.asInstanceOf[Int] + override def left: Expression = timeExp + override def right: Expression = format + + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + def this(time: Expression) = { + this(time, Literal("uuuu-MM-dd HH:mm:ss")) } - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - defineCodeGen(ctx, ev, (end, start) => s"$end - $start") + def this() = { + this(CurrentTimestamp()) } + + override def prettyName: String = "unix_timestamp" } /** - * Gets timestamps from strings using given pattern. + * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string + * representing the timestamp of that moment in the current system time zone in the given + * format. If the format is missing, using format like "1970-01-01 00:00:00". + * Note that hive Language Manual says it returns 0 if fail, but in fact it returns null. */ -private case class GetTimestamp( - left: Expression, - right: Expression, - timeZoneId: Option[String] = None) - extends ToTimestamp { - - override val downScaleFactor = 1 - override def dataType: DataType = TimestampType - - override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = - copy(timeZoneId = Option(timeZoneId)) -} - @ExpressionDescription( - usage = "_FUNC_(year, month, day) - Create date from year, month and day fields.", - arguments = """ - Arguments: - * year - the year to represent, from 1 to 9999 - * month - the month-of-year to represent, from 1 (January) to 12 (December) - * day - the day-of-month to represent, from 1 to 31 - """, + usage = "_FUNC_(unix_time, format) - Returns `unix_time` in the specified `format`.", examples = """ Examples: - > SELECT _FUNC_(2013, 7, 15); - 2013-07-15 - > SELECT _FUNC_(2019, 13, 1); - NULL - > SELECT _FUNC_(2019, 7, NULL); - NULL - > SELECT _FUNC_(2019, 2, 30); - NULL + > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss'); + 1969-12-31 16:00:00 """, - since = "3.0.0") -case class MakeDate(year: Expression, month: Expression, day: Expression) - extends TernaryExpression with ImplicitCastInputTypes { + since = "1.5.0") +case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[String] = None) + extends BinaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes { + + def this(sec: Expression, format: Expression) = this(sec, format, None) + + override def left: Expression = sec + override def right: Expression = format + + override def prettyName: String = "from_unixtime" + + def this(unix: Expression) = { + this(unix, Literal("uuuu-MM-dd HH:mm:ss")) + } - override def children: Seq[Expression] = Seq(year, month, day) - override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, IntegerType) - override def dataType: DataType = DateType + override def dataType: DataType = StringType override def nullable: Boolean = true - override def nullSafeEval(year: Any, month: Any, day: Any): Any = { + override def inputTypes: Seq[AbstractDataType] = Seq(LongType, StringType) + + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String] + private lazy val formatter: TimestampFormatter = try { - val ld = LocalDate.of(year.asInstanceOf[Int], month.asInstanceOf[Int], day.asInstanceOf[Int]) - localDateToDays(ld) + TimestampFormatter(constFormat.toString, zoneId) } catch { - case _: java.time.DateTimeException => null + case NonFatal(_) => null + } + + override def eval(input: InternalRow): Any = { + val time = left.eval(input) + if (time == null) { + null + } else { + if (format.foldable) { + if (constFormat == null || formatter == null) { + null + } else { + try { + UTF8String.fromString(formatter.format(time.asInstanceOf[Long] * MICROS_PER_SECOND)) + } catch { + case NonFatal(_) => null + } + } + } else { + val f = format.eval(input) + if (f == null) { + null + } else { + try { + UTF8String.fromString(TimestampFormatter(f.toString, zoneId) + .format(time.asInstanceOf[Long] * MICROS_PER_SECOND)) + } catch { + case NonFatal(_) => null + } + } + } } } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - nullSafeCodeGen(ctx, ev, (year, month, day) => { - s""" - try { - ${ev.value} = $dtu.localDateToDays(java.time.LocalDate.of($year, $month, $day)); - } catch (java.time.DateTimeException e) { - ${ev.isNull} = true; - }""" - }) + val df = classOf[TimestampFormatter].getName + if (format.foldable) { + if (formatter == null) { + ExprCode.forNullValue(StringType) + } else { + val formatterName = ctx.addReferenceObj("formatter", formatter, df) + val t = left.genCode(ctx) + ev.copy(code = code""" + ${t.code} + boolean ${ev.isNull} = ${t.isNull}; + ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; + if (!${ev.isNull}) { + try { + ${ev.value} = UTF8String.fromString($formatterName.format(${t.value} * 1000000L)); + } catch (java.lang.IllegalArgumentException e) { + ${ev.isNull} = true; + } + }""") + } + } else { + val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) + val locale = ctx.addReferenceObj("locale", Locale.US) + val tf = TimestampFormatter.getClass.getName.stripSuffix("$") + nullSafeCodeGen(ctx, ev, (seconds, f) => { + s""" + try { + ${ev.value} = UTF8String.fromString($tf$$.MODULE$$.apply($f.toString(), $zid, $locale). + format($seconds * 1000000L)); + } catch (java.lang.IllegalArgumentException e) { + ${ev.isNull} = true; + }""" + }) + } } - - override def prettyName: String = "make_date" } // scalastyle:off line.size.limit @@ -1929,54 +1196,6 @@ case class MakeTimestamp( override def prettyName: String = "make_timestamp" } -case class Millennium(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = IntegerType - - override protected def nullSafeEval(date: Any): Any = { - DateTimeUtils.getMillennium(date.asInstanceOf[Int]) - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, c => s"$dtu.getMillennium($c)") - } -} - -case class Century(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = IntegerType - - override protected def nullSafeEval(date: Any): Any = { - DateTimeUtils.getCentury(date.asInstanceOf[Int]) - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, c => s"$dtu.getCentury($c)") - } -} - -case class Decade(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = IntegerType - - override protected def nullSafeEval(date: Any): Any = { - DateTimeUtils.getDecade(date.asInstanceOf[Int]) - } - - override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - defineCodeGen(ctx, ev, c => s"$dtu.getDecade($c)") - } -} - case class Epoch(child: Expression, timeZoneId: Option[String] = None) extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression { @@ -2000,100 +1219,6 @@ case class Epoch(child: Expression, timeZoneId: Option[String] = None) } } -object DatePart { - - def parseExtractField( - extractField: String, - source: Expression, - errorHandleFunc: => Nothing): Expression = extractField.toUpperCase(Locale.ROOT) match { - case "MILLENNIUM" | "MILLENNIA" | "MIL" | "MILS" => Millennium(source) - case "CENTURY" | "CENTURIES" | "C" | "CENT" => Century(source) - case "DECADE" | "DECADES" | "DEC" | "DECS" => Decade(source) - case "YEAR" | "Y" | "YEARS" | "YR" | "YRS" => Year(source) - case "ISOYEAR" => IsoYear(source) - case "QUARTER" | "QTR" => Quarter(source) - case "MONTH" | "MON" | "MONS" | "MONTHS" => Month(source) - case "WEEK" | "W" | "WEEKS" => WeekOfYear(source) - case "DAY" | "D" | "DAYS" => DayOfMonth(source) - case "DAYOFWEEK" => DayOfWeek(source) - case "DOW" => Subtract(DayOfWeek(source), Literal(1)) - case "ISODOW" => Add(WeekDay(source), Literal(1)) - case "DOY" => DayOfYear(source) - case "HOUR" | "H" | "HOURS" | "HR" | "HRS" => Hour(source) - case "MINUTE" | "M" | "MIN" | "MINS" | "MINUTES" => Minute(source) - case "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => SecondWithFraction(source) - case "MILLISECONDS" | "MSEC" | "MSECS" | "MILLISECON" | "MSECONDS" | "MS" => - Milliseconds(source) - case "MICROSECONDS" | "USEC" | "USECS" | "USECONDS" | "MICROSECON" | "US" => - Microseconds(source) - case "EPOCH" => Epoch(source) - case _ => errorHandleFunc - } -} - -@ExpressionDescription( - usage = "_FUNC_(field, source) - Extracts a part of the date/timestamp.", - arguments = """ - Arguments: - * field - selects which part of the source should be extracted. Supported string values are: - ["MILLENNIUM", ("MILLENNIA", "MIL", "MILS"), - "CENTURY", ("CENTURIES", "C", "CENT"), - "DECADE", ("DECADES", "DEC", "DECS"), - "YEAR", ("Y", "YEARS", "YR", "YRS"), - "ISOYEAR", - "QUARTER", ("QTR"), - "MONTH", ("MON", "MONS", "MONTHS"), - "WEEK", ("W", "WEEKS"), - "DAY", ("D", "DAYS"), - "DAYOFWEEK", - "DOW", - "ISODOW", - "DOY", - "HOUR", ("H", "HOURS", "HR", "HRS"), - "MINUTE", ("M", "MIN", "MINS", "MINUTES"), - "SECOND", ("S", "SEC", "SECONDS", "SECS"), - "MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS"), - "MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US"), - "EPOCH"] - * source - a date (or timestamp) column from where `field` should be extracted - """, - examples = """ - Examples: - > SELECT _FUNC_('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456'); - 2019 - > SELECT _FUNC_('week', timestamp'2019-08-12 01:00:00.123456'); - 33 - > SELECT _FUNC_('doy', DATE'2019-08-12'); - 224 - > SELECT _FUNC_('SECONDS', timestamp'2019-10-01 00:00:01.000001'); - 1.000001 - """, - since = "3.0.0") -case class DatePart(field: Expression, source: Expression, child: Expression) - extends RuntimeReplaceable { - - def this(field: Expression, source: Expression) { - this(field, source, { - if (!field.foldable) { - throw new AnalysisException("The field parameter needs to be a foldable string value.") - } - val fieldEval = field.eval() - if (fieldEval == null) { - Literal(null, DoubleType) - } else { - val fieldStr = fieldEval.asInstanceOf[UTF8String].toString - DatePart.parseExtractField(fieldStr, source, { - throw new AnalysisException(s"Literals of type '$fieldStr' are currently not supported.") - }) - } - }) - } - - override def flatArguments: Iterator[Any] = Iterator(field, source) - override def sql: String = s"$prettyName(${field.sql}, ${source.sql})" - override def prettyName: String = "date_part" -} - /** * Returns the interval from startTimestamp to endTimestamp in which the `months` field * is set to 0 and the `microseconds` field is initialized to the microsecond difference diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 8680a15ee1cd7..a5b5b1e18757d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -19,21 +19,14 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat -import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset} +import java.time.ZoneOffset import java.util.{Calendar, Locale, TimeZone} import java.util.concurrent.TimeUnit -import java.util.concurrent.TimeUnit._ import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter} -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { @@ -66,12 +59,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { assert(cdpst <= cd && cd <= cdjst) } - test("datetime function current_timestamp") { - val ct = DateTimeUtils.toJavaTimestamp(CurrentTimestamp().eval(EmptyRow).asInstanceOf[Long]) - val t1 = System.currentTimeMillis() - assert(math.abs(t1 - ct.getTime) < 5000) - } - test("DayOfYear") { val sdfDay = new SimpleDateFormat("D", Locale.US) @@ -183,28 +170,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkConsistencyBetweenInterpretedAndCodegen(DayOfMonth, DateType) } - test("Seconds") { - assert(Second(Literal.create(null, DateType), gmtId).resolved === false) - assert(Second(Cast(Literal(d), TimestampType, gmtId), gmtId).resolved ) - checkEvaluation(Second(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) - checkEvaluation(Second(Cast(Literal(date), TimestampType, gmtId), gmtId), 15) - checkEvaluation(Second(Literal(ts), gmtId), 15) - - val c = Calendar.getInstance() - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { - val timeZoneId = Option(tz.getID) - c.setTimeZone(tz) - (0 to 60 by 5).foreach { s => - c.set(2015, 18, 3, 3, 5, s) - checkEvaluation( - Second(Literal(new Timestamp(c.getTimeInMillis)), timeZoneId), - c.get(Calendar.SECOND)) - } - checkConsistencyBetweenInterpretedAndCodegen( - (child: Expression) => Second(child, timeZoneId), TimestampType) - } - } - test("DayOfWeek") { checkEvaluation(DayOfWeek(Literal.create(null, DateType)), null) checkEvaluation(DayOfWeek(Literal(d)), Calendar.WEDNESDAY) @@ -241,95 +206,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkConsistencyBetweenInterpretedAndCodegen(WeekOfYear, DateType) } - test("DateFormat") { - checkEvaluation( - DateFormatClass(Literal.create(null, TimestampType), Literal("y"), gmtId), - null) - checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId), - Literal.create(null, StringType), gmtId), null) - - checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId), - Literal("y"), gmtId), "2015") - checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), gmtId), "2013") - checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId), - Literal("H"), gmtId), "0") - checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), gmtId), "13") - - checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, pstId), - Literal("y"), pstId), "2015") - checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), pstId), "2013") - checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, pstId), - Literal("H"), pstId), "0") - checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), pstId), "5") - - checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, jstId), - Literal("y"), jstId), "2015") - checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), jstId), "2013") - checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, jstId), - Literal("H"), jstId), "0") - checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), jstId), "22") - - // SPARK-28072 The codegen path should work - checkEvaluation( - expression = DateFormatClass( - BoundReference(ordinal = 0, dataType = TimestampType, nullable = true), - BoundReference(ordinal = 1, dataType = StringType, nullable = true), - jstId), - expected = "22", - inputRow = InternalRow(DateTimeUtils.fromJavaTimestamp(ts), UTF8String.fromString("H"))) - } - - test("Hour") { - assert(Hour(Literal.create(null, DateType), gmtId).resolved === false) - assert(Hour(Literal(ts), gmtId).resolved) - checkEvaluation(Hour(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) - checkEvaluation(Hour(Cast(Literal(date), TimestampType, gmtId), gmtId), 13) - checkEvaluation(Hour(Literal(ts), gmtId), 13) - - val c = Calendar.getInstance() - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { - val timeZoneId = Option(tz.getID) - c.setTimeZone(tz) - (0 to 24 by 6).foreach { h => - (0 to 60 by 30).foreach { m => - (0 to 60 by 30).foreach { s => - c.set(2015, 18, 3, h, m, s) - checkEvaluation( - Hour(Literal(new Timestamp(c.getTimeInMillis)), timeZoneId), - c.get(Calendar.HOUR_OF_DAY)) - } - } - } - checkConsistencyBetweenInterpretedAndCodegen( - (child: Expression) => Hour(child, timeZoneId), TimestampType) - } - } - - test("Minute") { - assert(Minute(Literal.create(null, DateType), gmtId).resolved === false) - assert(Minute(Literal(ts), gmtId).resolved) - checkEvaluation(Minute(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) - checkEvaluation( - Minute(Cast(Literal(date), TimestampType, gmtId), gmtId), 10) - checkEvaluation(Minute(Literal(ts), gmtId), 10) - - val c = Calendar.getInstance() - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { - val timeZoneId = Option(tz.getID) - c.setTimeZone(tz) - (0 to 60 by 5).foreach { m => - (0 to 60 by 15).foreach { s => - c.set(2015, 18, 3, 3, m, s) - checkEvaluation( - Minute(Literal(new Timestamp(c.getTimeInMillis)), timeZoneId), - c.get(Calendar.MINUTE)) - } - } - checkConsistencyBetweenInterpretedAndCodegen( - (child: Expression) => Minute(child, timeZoneId), TimestampType) - } - } - test("date_add") { checkEvaluation( DateAdd(Literal(Date.valueOf("2016-02-28")), Literal(1)), @@ -368,89 +244,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkConsistencyBetweenInterpretedAndCodegen(DateSub, DateType, IntegerType) } - test("time_add") { - val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US) - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { - val timeZoneId = Option(tz.getID) - sdf.setTimeZone(tz) - - checkEvaluation( - TimeAdd( - Literal(new Timestamp(sdf.parse("2016-01-29 10:00:00.000").getTime)), - Literal(new CalendarInterval(1, 123000L)), - timeZoneId), - DateTimeUtils.fromJavaTimestamp( - new Timestamp(sdf.parse("2016-02-29 10:00:00.123").getTime))) - - checkEvaluation( - TimeAdd( - Literal.create(null, TimestampType), - Literal(new CalendarInterval(1, 123000L)), - timeZoneId), - null) - checkEvaluation( - TimeAdd( - Literal(new Timestamp(sdf.parse("2016-01-29 10:00:00.000").getTime)), - Literal.create(null, CalendarIntervalType), - timeZoneId), - null) - checkEvaluation( - TimeAdd( - Literal.create(null, TimestampType), - Literal.create(null, CalendarIntervalType), - timeZoneId), - null) - checkConsistencyBetweenInterpretedAndCodegen( - (start: Expression, interval: Expression) => TimeAdd(start, interval, timeZoneId), - TimestampType, CalendarIntervalType) - } - } - - test("time_sub") { - val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US) - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { - val timeZoneId = Option(tz.getID) - sdf.setTimeZone(tz) - - checkEvaluation( - TimeSub( - Literal(new Timestamp(sdf.parse("2016-03-31 10:00:00.000").getTime)), - Literal(new CalendarInterval(1, 0)), - timeZoneId), - DateTimeUtils.fromJavaTimestamp( - new Timestamp(sdf.parse("2016-02-29 10:00:00.000").getTime))) - checkEvaluation( - TimeSub( - Literal(new Timestamp(sdf.parse("2016-03-30 00:00:01.000").getTime)), - Literal(new CalendarInterval(1, 2000000.toLong)), - timeZoneId), - DateTimeUtils.fromJavaTimestamp( - new Timestamp(sdf.parse("2016-02-28 23:59:59.000").getTime))) - - checkEvaluation( - TimeSub( - Literal.create(null, TimestampType), - Literal(new CalendarInterval(1, 123000L)), - timeZoneId), - null) - checkEvaluation( - TimeSub( - Literal(new Timestamp(sdf.parse("2016-01-29 10:00:00.000").getTime)), - Literal.create(null, CalendarIntervalType), - timeZoneId), - null) - checkEvaluation( - TimeSub( - Literal.create(null, TimestampType), - Literal.create(null, CalendarIntervalType), - timeZoneId), - null) - checkConsistencyBetweenInterpretedAndCodegen( - (start: Expression, interval: Expression) => TimeSub(start, interval, timeZoneId), - TimestampType, CalendarIntervalType) - } - } - test("add_months") { checkEvaluation(AddMonths(Literal(Date.valueOf("2015-01-30")), Literal(1)), DateTimeUtils.fromJavaDate(Date.valueOf("2015-02-28"))) @@ -477,60 +270,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } } - test("months_between") { - val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { - val timeZoneId = Option(tz.getID) - sdf.setTimeZone(tz) - - checkEvaluation( - MonthsBetween( - Literal(new Timestamp(sdf.parse("1997-02-28 10:30:00").getTime)), - Literal(new Timestamp(sdf.parse("1996-10-30 00:00:00").getTime)), - Literal.TrueLiteral, - timeZoneId = timeZoneId), 3.94959677) - checkEvaluation( - MonthsBetween( - Literal(new Timestamp(sdf.parse("1997-02-28 10:30:00").getTime)), - Literal(new Timestamp(sdf.parse("1996-10-30 00:00:00").getTime)), - Literal.FalseLiteral, - timeZoneId = timeZoneId), 3.9495967741935485) - - Seq(Literal.FalseLiteral, Literal.TrueLiteral). foreach { roundOff => - checkEvaluation( - MonthsBetween( - Literal(new Timestamp(sdf.parse("2015-01-30 11:52:00").getTime)), - Literal(new Timestamp(sdf.parse("2015-01-30 11:50:00").getTime)), - roundOff, - timeZoneId = timeZoneId), 0.0) - checkEvaluation( - MonthsBetween( - Literal(new Timestamp(sdf.parse("2015-01-31 00:00:00").getTime)), - Literal(new Timestamp(sdf.parse("2015-03-31 22:00:00").getTime)), - roundOff, - timeZoneId = timeZoneId), -2.0) - checkEvaluation( - MonthsBetween( - Literal(new Timestamp(sdf.parse("2015-03-31 22:00:00").getTime)), - Literal(new Timestamp(sdf.parse("2015-02-28 00:00:00").getTime)), - roundOff, - timeZoneId = timeZoneId), 1.0) - } - val t = Literal(Timestamp.valueOf("2015-03-31 22:00:00")) - val tnull = Literal.create(null, TimestampType) - checkEvaluation(MonthsBetween(t, tnull, Literal.TrueLiteral, timeZoneId = timeZoneId), null) - checkEvaluation(MonthsBetween(tnull, t, Literal.TrueLiteral, timeZoneId = timeZoneId), null) - checkEvaluation( - MonthsBetween(tnull, tnull, Literal.TrueLiteral, timeZoneId = timeZoneId), null) - checkEvaluation( - MonthsBetween(t, t, Literal.create(null, BooleanType), timeZoneId = timeZoneId), null) - checkConsistencyBetweenInterpretedAndCodegen( - (time1: Expression, time2: Expression, roundOff: Expression) => - MonthsBetween(time1, time2, roundOff, timeZoneId = timeZoneId), - TimestampType, TimestampType, BooleanType) - } - } - test("last_day") { checkEvaluation(LastDay(Literal(Date.valueOf("2015-02-28"))), Date.valueOf("2015-02-28")) checkEvaluation(LastDay(Literal(Date.valueOf("2015-03-27"))), Date.valueOf("2015-03-31")) @@ -601,241 +340,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { testTrunc(Date.valueOf("2000-03-08"), "century", Date.valueOf("1901-01-01")) } - test("TruncTimestamp") { - def testTrunc(input: Timestamp, fmt: String, expected: Timestamp): Unit = { - checkEvaluation( - TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)), - expected) - checkEvaluation( - TruncTimestamp( - NonFoldableLiteral.create(fmt, StringType), Literal.create(input, TimestampType)), - expected) - } - - withDefaultTimeZone(TimeZoneGMT) { - val inputDate = Timestamp.valueOf("2015-07-22 05:30:06") - - Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { fmt => - testTrunc( - inputDate, fmt, - Timestamp.valueOf("2015-01-01 00:00:00")) - } - - Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { fmt => - testTrunc( - inputDate, fmt, - Timestamp.valueOf("2015-07-01 00:00:00")) - } - - Seq("DAY", "day", "DD", "dd").foreach { fmt => - testTrunc( - inputDate, fmt, - Timestamp.valueOf("2015-07-22 00:00:00")) - } - - Seq("HOUR", "hour").foreach { fmt => - testTrunc( - inputDate, fmt, - Timestamp.valueOf("2015-07-22 05:00:00")) - } - - Seq("MINUTE", "minute").foreach { fmt => - testTrunc( - inputDate, fmt, - Timestamp.valueOf("2015-07-22 05:30:00")) - } - - Seq("SECOND", "second").foreach { fmt => - testTrunc( - inputDate, fmt, - Timestamp.valueOf("2015-07-22 05:30:06")) - } - - Seq("WEEK", "week").foreach { fmt => - testTrunc( - inputDate, fmt, - Timestamp.valueOf("2015-07-20 00:00:00")) - } - - Seq("QUARTER", "quarter").foreach { fmt => - testTrunc( - inputDate, fmt, - Timestamp.valueOf("2015-07-01 00:00:00")) - } - - testTrunc(inputDate, "INVALID", null) - testTrunc(inputDate, null, null) - testTrunc(null, "MON", null) - testTrunc(null, null, null) - - testTrunc(Timestamp.valueOf("2000-03-08 11:12:13"), "decade", - Timestamp.valueOf("2000-01-01 00:00:00")) - testTrunc(Timestamp.valueOf("2000-03-08 11:12:13"), "century", - Timestamp.valueOf("1901-01-01 00:00:00")) - } - } - - test("from_unixtime") { - val fmt1 = "yyyy-MM-dd HH:mm:ss" - val sdf1 = new SimpleDateFormat(fmt1, Locale.US) - val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS" - val sdf2 = new SimpleDateFormat(fmt2, Locale.US) - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { - val timeZoneId = Option(tz.getID) - sdf1.setTimeZone(tz) - sdf2.setTimeZone(tz) - - checkEvaluation( - FromUnixTime(Literal(0L), Literal(fmt1), timeZoneId), - sdf1.format(new Timestamp(0))) - checkEvaluation(FromUnixTime( - Literal(1000L), Literal(fmt1), timeZoneId), - sdf1.format(new Timestamp(1000000))) - checkEvaluation( - FromUnixTime(Literal(-1000L), Literal(fmt2), timeZoneId), - sdf2.format(new Timestamp(-1000000))) - checkEvaluation( - FromUnixTime(Literal.create(null, LongType), Literal.create(null, StringType), timeZoneId), - null) - checkEvaluation( - FromUnixTime(Literal.create(null, LongType), Literal(fmt1), timeZoneId), - null) - checkEvaluation( - FromUnixTime(Literal(1000L), Literal.create(null, StringType), timeZoneId), - null) - checkEvaluation( - FromUnixTime(Literal(0L), Literal("not a valid format"), timeZoneId), null) - - // SPARK-28072 The codegen path for non-literal input should also work - checkEvaluation( - expression = FromUnixTime( - BoundReference(ordinal = 0, dataType = LongType, nullable = true), - BoundReference(ordinal = 1, dataType = StringType, nullable = true), - timeZoneId), - expected = UTF8String.fromString(sdf1.format(new Timestamp(0))), - inputRow = InternalRow(0L, UTF8String.fromString(fmt1))) - } - } - - test("unix_timestamp") { - val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) - val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS" - val sdf2 = new SimpleDateFormat(fmt2, Locale.US) - val fmt3 = "yy-MM-dd" - val sdf3 = new SimpleDateFormat(fmt3, Locale.US) - sdf3.setTimeZone(TimeZoneGMT) - - withDefaultTimeZone(TimeZoneGMT) { - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { - val timeZoneId = Option(tz.getID) - sdf1.setTimeZone(tz) - sdf2.setTimeZone(tz) - - val date1 = Date.valueOf("2015-07-24") - checkEvaluation(UnixTimestamp( - Literal(sdf1.format(new Timestamp(0))), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), 0L) - checkEvaluation(UnixTimestamp( - Literal(sdf1.format(new Timestamp(1000000))), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), - 1000L) - checkEvaluation( - UnixTimestamp( - Literal(new Timestamp(1000000)), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), - 1000L) - checkEvaluation( - UnixTimestamp(Literal(date1), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), - MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz))) - checkEvaluation( - UnixTimestamp(Literal(sdf2.format(new Timestamp(-1000000))), Literal(fmt2), timeZoneId), - -1000L) - checkEvaluation(UnixTimestamp( - Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId), - MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis( - DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz))) - val t1 = UnixTimestamp( - CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long] - val t2 = UnixTimestamp( - CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long] - assert(t2 - t1 <= 1) - checkEvaluation( - UnixTimestamp( - Literal.create(null, DateType), Literal.create(null, StringType), timeZoneId), - null) - checkEvaluation( - UnixTimestamp(Literal.create(null, DateType), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), - null) - checkEvaluation( - UnixTimestamp(Literal(date1), Literal.create(null, StringType), timeZoneId), - MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz))) - checkEvaluation( - UnixTimestamp(Literal("2015-07-24"), Literal("not a valid format"), timeZoneId), null) - } - } - } - - test("to_unix_timestamp") { - val fmt1 = "yyyy-MM-dd HH:mm:ss" - val sdf1 = new SimpleDateFormat(fmt1, Locale.US) - val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS" - val sdf2 = new SimpleDateFormat(fmt2, Locale.US) - val fmt3 = "yy-MM-dd" - val sdf3 = new SimpleDateFormat(fmt3, Locale.US) - sdf3.setTimeZone(TimeZoneGMT) - - withDefaultTimeZone(TimeZoneGMT) { - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { - val timeZoneId = Option(tz.getID) - sdf1.setTimeZone(tz) - sdf2.setTimeZone(tz) - - val date1 = Date.valueOf("2015-07-24") - checkEvaluation(ToUnixTimestamp( - Literal(sdf1.format(new Timestamp(0))), Literal(fmt1), timeZoneId), 0L) - checkEvaluation(ToUnixTimestamp( - Literal(sdf1.format(new Timestamp(1000000))), Literal(fmt1), timeZoneId), - 1000L) - checkEvaluation(ToUnixTimestamp( - Literal(new Timestamp(1000000)), Literal(fmt1)), - 1000L) - checkEvaluation( - ToUnixTimestamp(Literal(date1), Literal(fmt1), timeZoneId), - MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz))) - checkEvaluation( - ToUnixTimestamp(Literal(sdf2.format(new Timestamp(-1000000))), Literal(fmt2), timeZoneId), - -1000L) - checkEvaluation(ToUnixTimestamp( - Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId), - MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis( - DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz))) - val t1 = ToUnixTimestamp( - CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long] - val t2 = ToUnixTimestamp( - CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long] - assert(t2 - t1 <= 1) - checkEvaluation(ToUnixTimestamp( - Literal.create(null, DateType), Literal.create(null, StringType), timeZoneId), null) - checkEvaluation( - ToUnixTimestamp( - Literal.create(null, DateType), Literal(fmt1), timeZoneId), - null) - checkEvaluation(ToUnixTimestamp( - Literal(date1), Literal.create(null, StringType), timeZoneId), - MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz))) - checkEvaluation( - ToUnixTimestamp(Literal("2015-07-24"), Literal("not a valid format"), timeZoneId), null) - - // SPARK-28072 The codegen path for non-literal input should also work - checkEvaluation( - expression = ToUnixTimestamp( - BoundReference(ordinal = 0, dataType = StringType, nullable = true), - BoundReference(ordinal = 1, dataType = StringType, nullable = true), - timeZoneId), - expected = 0L, - inputRow = InternalRow( - UTF8String.fromString(sdf1.format(new Timestamp(0))), UTF8String.fromString(fmt1))) - } - } - } - test("datediff") { checkEvaluation( DateDiff(Literal(Date.valueOf("2015-07-24")), Literal(Date.valueOf("2015-07-21"))), 3) @@ -850,83 +354,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { null) } - test("to_utc_timestamp") { - def test(t: String, tz: String, expected: String): Unit = { - checkEvaluation( - ToUTCTimestamp( - Literal.create(if (t != null) Timestamp.valueOf(t) else null, TimestampType), - Literal.create(tz, StringType)), - if (expected != null) Timestamp.valueOf(expected) else null) - checkEvaluation( - ToUTCTimestamp( - Literal.create(if (t != null) Timestamp.valueOf(t) else null, TimestampType), - NonFoldableLiteral.create(tz, StringType)), - if (expected != null) Timestamp.valueOf(expected) else null) - } - withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { - test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00") - test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00") - test(null, "UTC", null) - test("2015-07-24 00:00:00", null, null) - test(null, null, null) - } - val msg = intercept[AnalysisException] { - test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00") - }.getMessage - assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key)) - } - - test("to_utc_timestamp - invalid time zone id") { - withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { - Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz => - val msg = intercept[java.time.DateTimeException] { - GenerateUnsafeProjection.generate( - ToUTCTimestamp( - Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil) - }.getMessage - assert(msg.contains(invalidTz)) - } - } - } - - test("from_utc_timestamp") { - def test(t: String, tz: String, expected: String): Unit = { - checkEvaluation( - FromUTCTimestamp( - Literal.create(if (t != null) Timestamp.valueOf(t) else null, TimestampType), - Literal.create(tz, StringType)), - if (expected != null) Timestamp.valueOf(expected) else null) - checkEvaluation( - FromUTCTimestamp( - Literal.create(if (t != null) Timestamp.valueOf(t) else null, TimestampType), - NonFoldableLiteral.create(tz, StringType)), - if (expected != null) Timestamp.valueOf(expected) else null) - } - withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { - test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00") - test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00") - test(null, "UTC", null) - test("2015-07-24 00:00:00", null, null) - test(null, null, null) - } - val msg = intercept[AnalysisException] { - test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00") - }.getMessage - assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key)) - } - - test("from_utc_timestamp - invalid time zone id") { - withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { - Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz => - val msg = intercept[java.time.DateTimeException] { - GenerateUnsafeProjection.generate( - FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil) - }.getMessage - assert(msg.contains(invalidTz)) - } - } - } - test("creating values of DateType via make_date") { checkEvaluation(MakeDate(Literal(2013), Literal(7), Literal(15)), Date.valueOf("2013-7-15")) checkEvaluation(MakeDate(Literal.create(null, IntegerType), Literal(7), Literal(15)), null) @@ -937,42 +364,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(MakeDate(Literal(2019), Literal(7), Literal(32)), null) } - test("creating values of TimestampType via make_timestamp") { - var makeTimestampExpr = MakeTimestamp( - Literal(2013), Literal(7), Literal(15), Literal(8), Literal(15), - Literal(Decimal(BigDecimal(23.5), 8, 6)), Some(Literal(ZoneId.systemDefault().getId))) - val expected = Timestamp.valueOf("2013-7-15 8:15:23.5") - checkEvaluation(makeTimestampExpr, expected) - checkEvaluation(makeTimestampExpr.copy(timezone = None), expected) - - checkEvaluation(makeTimestampExpr.copy(year = Literal.create(null, IntegerType)), null) - checkEvaluation(makeTimestampExpr.copy(year = Literal(Int.MaxValue)), null) - - checkEvaluation(makeTimestampExpr.copy(month = Literal.create(null, IntegerType)), null) - checkEvaluation(makeTimestampExpr.copy(month = Literal(13)), null) - - checkEvaluation(makeTimestampExpr.copy(day = Literal.create(null, IntegerType)), null) - checkEvaluation(makeTimestampExpr.copy(day = Literal(32)), null) - - checkEvaluation(makeTimestampExpr.copy(hour = Literal.create(null, IntegerType)), null) - checkEvaluation(makeTimestampExpr.copy(hour = Literal(25)), null) - - checkEvaluation(makeTimestampExpr.copy(min = Literal.create(null, IntegerType)), null) - checkEvaluation(makeTimestampExpr.copy(min = Literal(65)), null) - - checkEvaluation(makeTimestampExpr.copy(sec = Literal.create(null, DecimalType(8, 6))), null) - checkEvaluation(makeTimestampExpr.copy(sec = Literal(Decimal(BigDecimal(70.0), 8, 6))), null) - - makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(6), Literal(30), - Literal(23), Literal(59), Literal(Decimal(BigDecimal(60.0), 8, 6))) - checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-07-01 00:00:00")) - checkEvaluation(makeTimestampExpr.copy(sec = Literal(Decimal(BigDecimal(60.5), 8, 6))), null) - - makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(8), Literal(12), - Literal(0), Literal(0), Literal(Decimal(BigDecimal(58.000001), 8, 6))) - checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-08-12 00:00:58.000001")) - } - test("millennium") { val date = MakeDate(Literal(2019), Literal(1), Literal(1)) checkEvaluation(Millennium(date), 3) @@ -1011,80 +402,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Decade(date.copy(year = Literal(-2019))), -202) } - test("milliseconds and microseconds") { - outstandingTimezonesIds.foreach { timezone => - var timestamp = MakeTimestamp(Literal(2019), Literal(8), Literal(10), - Literal(0), Literal(0), Literal(Decimal(BigDecimal(10.123456789), 8, 6)), - Some(Literal(timezone))) - - checkEvaluation(Milliseconds(timestamp), Decimal(BigDecimal(10123.457), 8, 3)) - checkEvaluation(Microseconds(timestamp), 10123457) - - timestamp = timestamp.copy(sec = Literal(Decimal(0.0, 8, 6))) - checkEvaluation(Milliseconds(timestamp), Decimal(0, 8, 3)) - checkEvaluation(Microseconds(timestamp), 0) - - timestamp = timestamp.copy(sec = Literal(Decimal(BigDecimal(59.999999), 8, 6))) - checkEvaluation(Milliseconds(timestamp), Decimal(BigDecimal(59999.999), 8, 3)) - checkEvaluation(Microseconds(timestamp), 59999999) - - timestamp = timestamp.copy(sec = Literal(Decimal(BigDecimal(60.0), 8, 6))) - checkEvaluation(Milliseconds(timestamp), Decimal(0, 8, 3)) - checkEvaluation(Microseconds(timestamp), 0) - } - } - - test("epoch") { - val zoneId = ZoneId.systemDefault() - val nanos = 123456000 - val timestamp = Epoch(MakeTimestamp( - Literal(2019), Literal(8), Literal(9), Literal(0), Literal(0), - Literal(Decimal(nanos / DateTimeUtils.NANOS_PER_SECOND.toDouble, 8, 6)), - Some(Literal(zoneId.getId)))) - val instant = LocalDateTime.of(2019, 8, 9, 0, 0, 0, nanos) - .atZone(zoneId).toInstant - val expected = Decimal(BigDecimal(nanos) / DateTimeUtils.NANOS_PER_SECOND + - instant.getEpochSecond + - zoneId.getRules.getOffset(instant).getTotalSeconds) - checkEvaluation(timestamp, expected) - } - test("ISO 8601 week-numbering year") { checkEvaluation(IsoYear(MakeDate(Literal(2006), Literal(1), Literal(1))), 2005) checkEvaluation(IsoYear(MakeDate(Literal(2006), Literal(1), Literal(2))), 2006) } - - test("extract the seconds part with fraction from timestamps") { - outstandingTimezonesIds.foreach { timezone => - val timestamp = MakeTimestamp(Literal(2019), Literal(8), Literal(10), - Literal(0), Literal(0), Literal(Decimal(10.123456, 8, 6)), - Some(Literal(timezone))) - - checkEvaluation(SecondWithFraction(timestamp), Decimal(10.123456, 8, 6)) - checkEvaluation( - SecondWithFraction(timestamp.copy(sec = Literal(Decimal(59000001, 8, 6)))), - Decimal(59000001, 8, 6)) - checkEvaluation( - SecondWithFraction(timestamp.copy(sec = Literal(Decimal(1, 8, 6)))), - Decimal(0.000001, 8, 6)) - } - } - - test("timestamps difference") { - val end = Instant.parse("2019-10-04T11:04:01.123456Z") - checkEvaluation(TimestampDiff(Literal(end), Literal(end)), - new CalendarInterval(0, 0)) - checkEvaluation(TimestampDiff(Literal(end), Literal(Instant.EPOCH)), - CalendarInterval.fromString("interval 18173 days " + - "11 hours 4 minutes 1 seconds 123 milliseconds 456 microseconds")) - checkEvaluation(TimestampDiff(Literal(Instant.EPOCH), Literal(end)), - CalendarInterval.fromString("interval -18173 days " + - "-11 hours -4 minutes -1 seconds -123 milliseconds -456 microseconds")) - checkEvaluation( - TimestampDiff( - Literal(Instant.parse("9999-12-31T23:59:59.999999Z")), - Literal(Instant.parse("0001-01-01T00:00:00Z"))), - CalendarInterval.fromString("interval 521722 weeks 4 days " + - "23 hours 59 minutes 59 seconds 999 milliseconds 999 microseconds")) - } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimestampExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimestampExpressionsSuite.scala new file mode 100644 index 0000000000000..df49174b875b2 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimestampExpressionsSuite.scala @@ -0,0 +1,729 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import java.sql.{Date, Timestamp} +import java.text.SimpleDateFormat +import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset} +import java.util.{Calendar, Locale, TimeZone} +import java.util.concurrent.TimeUnit +import java.util.concurrent.TimeUnit._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection +import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ +import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} + +class TimestampExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { + + val TimeZonePST = TimeZone.getTimeZone("PST") + val TimeZoneJST = TimeZone.getTimeZone("JST") + + val gmtId = Option(TimeZoneGMT.getID) + val pstId = Option(TimeZonePST.getID) + val jstId = Option(TimeZoneJST.getID) + + def toMillis(timestamp: String): Long = { + val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", ZoneOffset.UTC) + TimeUnit.MICROSECONDS.toMillis(tf.parse(timestamp)) + } + val date = "2015-04-08 13:10:15" + val d = new Date(toMillis(date)) + val time = "2013-11-08 13:10:15" + val ts = new Timestamp(toMillis(time)) + + test("datetime function current_timestamp") { + val ct = DateTimeUtils.toJavaTimestamp(CurrentTimestamp().eval(EmptyRow).asInstanceOf[Long]) + val t1 = System.currentTimeMillis() + assert(math.abs(t1 - ct.getTime) < 5000) + } + + test("Seconds") { + assert(Second(Literal.create(null, DateType), gmtId).resolved === false) + assert(Second(Cast(Literal(d), TimestampType, gmtId), gmtId).resolved ) + checkEvaluation(Second(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) + checkEvaluation(Second(Cast(Literal(date), TimestampType, gmtId), gmtId), 15) + checkEvaluation(Second(Literal(ts), gmtId), 15) + + val c = Calendar.getInstance() + for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + val timeZoneId = Option(tz.getID) + c.setTimeZone(tz) + (0 to 60 by 5).foreach { s => + c.set(2015, 18, 3, 3, 5, s) + checkEvaluation( + Second(Literal(new Timestamp(c.getTimeInMillis)), timeZoneId), + c.get(Calendar.SECOND)) + } + checkConsistencyBetweenInterpretedAndCodegen( + (child: Expression) => Second(child, timeZoneId), TimestampType) + } + } + + test("DateFormat") { + checkEvaluation( + DateFormatClass(Literal.create(null, TimestampType), Literal("y"), gmtId), + null) + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId), + Literal.create(null, StringType), gmtId), null) + + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId), + Literal("y"), gmtId), "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), gmtId), "2013") + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId), + Literal("H"), gmtId), "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), gmtId), "13") + + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, pstId), + Literal("y"), pstId), "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), pstId), "2013") + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, pstId), + Literal("H"), pstId), "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), pstId), "5") + + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, jstId), + Literal("y"), jstId), "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), jstId), "2013") + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, jstId), + Literal("H"), jstId), "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), jstId), "22") + + // SPARK-28072 The codegen path should work + checkEvaluation( + expression = DateFormatClass( + BoundReference(ordinal = 0, dataType = TimestampType, nullable = true), + BoundReference(ordinal = 1, dataType = StringType, nullable = true), + jstId), + expected = "22", + inputRow = InternalRow(DateTimeUtils.fromJavaTimestamp(ts), UTF8String.fromString("H"))) + } + + test("Hour") { + assert(Hour(Literal.create(null, DateType), gmtId).resolved === false) + assert(Hour(Literal(ts), gmtId).resolved) + checkEvaluation(Hour(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) + checkEvaluation(Hour(Cast(Literal(date), TimestampType, gmtId), gmtId), 13) + checkEvaluation(Hour(Literal(ts), gmtId), 13) + + val c = Calendar.getInstance() + for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + val timeZoneId = Option(tz.getID) + c.setTimeZone(tz) + (0 to 24 by 6).foreach { h => + (0 to 60 by 30).foreach { m => + (0 to 60 by 30).foreach { s => + c.set(2015, 18, 3, h, m, s) + checkEvaluation( + Hour(Literal(new Timestamp(c.getTimeInMillis)), timeZoneId), + c.get(Calendar.HOUR_OF_DAY)) + } + } + } + checkConsistencyBetweenInterpretedAndCodegen( + (child: Expression) => Hour(child, timeZoneId), TimestampType) + } + } + + test("Minute") { + assert(Minute(Literal.create(null, DateType), gmtId).resolved === false) + assert(Minute(Literal(ts), gmtId).resolved) + checkEvaluation(Minute(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) + checkEvaluation( + Minute(Cast(Literal(date), TimestampType, gmtId), gmtId), 10) + checkEvaluation(Minute(Literal(ts), gmtId), 10) + + val c = Calendar.getInstance() + for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + val timeZoneId = Option(tz.getID) + c.setTimeZone(tz) + (0 to 60 by 5).foreach { m => + (0 to 60 by 15).foreach { s => + c.set(2015, 18, 3, 3, m, s) + checkEvaluation( + Minute(Literal(new Timestamp(c.getTimeInMillis)), timeZoneId), + c.get(Calendar.MINUTE)) + } + } + checkConsistencyBetweenInterpretedAndCodegen( + (child: Expression) => Minute(child, timeZoneId), TimestampType) + } + } + + test("time_add") { + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US) + for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + val timeZoneId = Option(tz.getID) + sdf.setTimeZone(tz) + + checkEvaluation( + TimeAdd( + Literal(new Timestamp(sdf.parse("2016-01-29 10:00:00.000").getTime)), + Literal(new CalendarInterval(1, 123000L)), + timeZoneId), + DateTimeUtils.fromJavaTimestamp( + new Timestamp(sdf.parse("2016-02-29 10:00:00.123").getTime))) + + checkEvaluation( + TimeAdd( + Literal.create(null, TimestampType), + Literal(new CalendarInterval(1, 123000L)), + timeZoneId), + null) + checkEvaluation( + TimeAdd( + Literal(new Timestamp(sdf.parse("2016-01-29 10:00:00.000").getTime)), + Literal.create(null, CalendarIntervalType), + timeZoneId), + null) + checkEvaluation( + TimeAdd( + Literal.create(null, TimestampType), + Literal.create(null, CalendarIntervalType), + timeZoneId), + null) + checkConsistencyBetweenInterpretedAndCodegen( + (start: Expression, interval: Expression) => TimeAdd(start, interval, timeZoneId), + TimestampType, CalendarIntervalType) + } + } + + test("time_sub") { + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US) + for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + val timeZoneId = Option(tz.getID) + sdf.setTimeZone(tz) + + checkEvaluation( + TimeSub( + Literal(new Timestamp(sdf.parse("2016-03-31 10:00:00.000").getTime)), + Literal(new CalendarInterval(1, 0)), + timeZoneId), + DateTimeUtils.fromJavaTimestamp( + new Timestamp(sdf.parse("2016-02-29 10:00:00.000").getTime))) + checkEvaluation( + TimeSub( + Literal(new Timestamp(sdf.parse("2016-03-30 00:00:01.000").getTime)), + Literal(new CalendarInterval(1, 2000000.toLong)), + timeZoneId), + DateTimeUtils.fromJavaTimestamp( + new Timestamp(sdf.parse("2016-02-28 23:59:59.000").getTime))) + + checkEvaluation( + TimeSub( + Literal.create(null, TimestampType), + Literal(new CalendarInterval(1, 123000L)), + timeZoneId), + null) + checkEvaluation( + TimeSub( + Literal(new Timestamp(sdf.parse("2016-01-29 10:00:00.000").getTime)), + Literal.create(null, CalendarIntervalType), + timeZoneId), + null) + checkEvaluation( + TimeSub( + Literal.create(null, TimestampType), + Literal.create(null, CalendarIntervalType), + timeZoneId), + null) + checkConsistencyBetweenInterpretedAndCodegen( + (start: Expression, interval: Expression) => TimeSub(start, interval, timeZoneId), + TimestampType, CalendarIntervalType) + } + } + + test("months_between") { + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) + for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + val timeZoneId = Option(tz.getID) + sdf.setTimeZone(tz) + + checkEvaluation( + MonthsBetween( + Literal(new Timestamp(sdf.parse("1997-02-28 10:30:00").getTime)), + Literal(new Timestamp(sdf.parse("1996-10-30 00:00:00").getTime)), + Literal.TrueLiteral, + timeZoneId = timeZoneId), 3.94959677) + checkEvaluation( + MonthsBetween( + Literal(new Timestamp(sdf.parse("1997-02-28 10:30:00").getTime)), + Literal(new Timestamp(sdf.parse("1996-10-30 00:00:00").getTime)), + Literal.FalseLiteral, + timeZoneId = timeZoneId), 3.9495967741935485) + + Seq(Literal.FalseLiteral, Literal.TrueLiteral). foreach { roundOff => + checkEvaluation( + MonthsBetween( + Literal(new Timestamp(sdf.parse("2015-01-30 11:52:00").getTime)), + Literal(new Timestamp(sdf.parse("2015-01-30 11:50:00").getTime)), + roundOff, + timeZoneId = timeZoneId), 0.0) + checkEvaluation( + MonthsBetween( + Literal(new Timestamp(sdf.parse("2015-01-31 00:00:00").getTime)), + Literal(new Timestamp(sdf.parse("2015-03-31 22:00:00").getTime)), + roundOff, + timeZoneId = timeZoneId), -2.0) + checkEvaluation( + MonthsBetween( + Literal(new Timestamp(sdf.parse("2015-03-31 22:00:00").getTime)), + Literal(new Timestamp(sdf.parse("2015-02-28 00:00:00").getTime)), + roundOff, + timeZoneId = timeZoneId), 1.0) + } + val t = Literal(Timestamp.valueOf("2015-03-31 22:00:00")) + val tnull = Literal.create(null, TimestampType) + checkEvaluation(MonthsBetween(t, tnull, Literal.TrueLiteral, timeZoneId = timeZoneId), null) + checkEvaluation(MonthsBetween(tnull, t, Literal.TrueLiteral, timeZoneId = timeZoneId), null) + checkEvaluation( + MonthsBetween(tnull, tnull, Literal.TrueLiteral, timeZoneId = timeZoneId), null) + checkEvaluation( + MonthsBetween(t, t, Literal.create(null, BooleanType), timeZoneId = timeZoneId), null) + checkConsistencyBetweenInterpretedAndCodegen( + (time1: Expression, time2: Expression, roundOff: Expression) => + MonthsBetween(time1, time2, roundOff, timeZoneId = timeZoneId), + TimestampType, TimestampType, BooleanType) + } + } + + test("TruncTimestamp") { + def testTrunc(input: Timestamp, fmt: String, expected: Timestamp): Unit = { + checkEvaluation( + TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)), + expected) + checkEvaluation( + TruncTimestamp( + NonFoldableLiteral.create(fmt, StringType), Literal.create(input, TimestampType)), + expected) + } + + withDefaultTimeZone(TimeZoneGMT) { + val inputDate = Timestamp.valueOf("2015-07-22 05:30:06") + + Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-01-01 00:00:00")) + } + + Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-01 00:00:00")) + } + + Seq("DAY", "day", "DD", "dd").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-22 00:00:00")) + } + + Seq("HOUR", "hour").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-22 05:00:00")) + } + + Seq("MINUTE", "minute").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-22 05:30:00")) + } + + Seq("SECOND", "second").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-22 05:30:06")) + } + + Seq("WEEK", "week").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-20 00:00:00")) + } + + Seq("QUARTER", "quarter").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-01 00:00:00")) + } + + testTrunc(inputDate, "INVALID", null) + testTrunc(inputDate, null, null) + testTrunc(null, "MON", null) + testTrunc(null, null, null) + + testTrunc(Timestamp.valueOf("2000-03-08 11:12:13"), "decade", + Timestamp.valueOf("2000-01-01 00:00:00")) + testTrunc(Timestamp.valueOf("2000-03-08 11:12:13"), "century", + Timestamp.valueOf("1901-01-01 00:00:00")) + } + } + + test("from_unixtime") { + val fmt1 = "yyyy-MM-dd HH:mm:ss" + val sdf1 = new SimpleDateFormat(fmt1, Locale.US) + val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS" + val sdf2 = new SimpleDateFormat(fmt2, Locale.US) + for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + val timeZoneId = Option(tz.getID) + sdf1.setTimeZone(tz) + sdf2.setTimeZone(tz) + + checkEvaluation( + FromUnixTime(Literal(0L), Literal(fmt1), timeZoneId), + sdf1.format(new Timestamp(0))) + checkEvaluation(FromUnixTime( + Literal(1000L), Literal(fmt1), timeZoneId), + sdf1.format(new Timestamp(1000000))) + checkEvaluation( + FromUnixTime(Literal(-1000L), Literal(fmt2), timeZoneId), + sdf2.format(new Timestamp(-1000000))) + checkEvaluation( + FromUnixTime(Literal.create(null, LongType), Literal.create(null, StringType), timeZoneId), + null) + checkEvaluation( + FromUnixTime(Literal.create(null, LongType), Literal(fmt1), timeZoneId), + null) + checkEvaluation( + FromUnixTime(Literal(1000L), Literal.create(null, StringType), timeZoneId), + null) + checkEvaluation( + FromUnixTime(Literal(0L), Literal("not a valid format"), timeZoneId), null) + + // SPARK-28072 The codegen path for non-literal input should also work + checkEvaluation( + expression = FromUnixTime( + BoundReference(ordinal = 0, dataType = LongType, nullable = true), + BoundReference(ordinal = 1, dataType = StringType, nullable = true), + timeZoneId), + expected = UTF8String.fromString(sdf1.format(new Timestamp(0))), + inputRow = InternalRow(0L, UTF8String.fromString(fmt1))) + } + } + + test("unix_timestamp") { + val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) + val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS" + val sdf2 = new SimpleDateFormat(fmt2, Locale.US) + val fmt3 = "yy-MM-dd" + val sdf3 = new SimpleDateFormat(fmt3, Locale.US) + sdf3.setTimeZone(TimeZoneGMT) + + withDefaultTimeZone(TimeZoneGMT) { + for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + val timeZoneId = Option(tz.getID) + sdf1.setTimeZone(tz) + sdf2.setTimeZone(tz) + + val date1 = Date.valueOf("2015-07-24") + checkEvaluation(UnixTimestamp( + Literal(sdf1.format(new Timestamp(0))), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), 0L) + checkEvaluation(UnixTimestamp( + Literal(sdf1.format(new Timestamp(1000000))), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), + 1000L) + checkEvaluation( + UnixTimestamp( + Literal(new Timestamp(1000000)), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), + 1000L) + checkEvaluation( + UnixTimestamp(Literal(date1), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), + MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz))) + checkEvaluation( + UnixTimestamp(Literal(sdf2.format(new Timestamp(-1000000))), Literal(fmt2), timeZoneId), + -1000L) + checkEvaluation(UnixTimestamp( + Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId), + MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis( + DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz))) + val t1 = UnixTimestamp( + CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long] + val t2 = UnixTimestamp( + CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long] + assert(t2 - t1 <= 1) + checkEvaluation( + UnixTimestamp( + Literal.create(null, DateType), Literal.create(null, StringType), timeZoneId), + null) + checkEvaluation( + UnixTimestamp(Literal.create(null, DateType), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), + null) + checkEvaluation( + UnixTimestamp(Literal(date1), Literal.create(null, StringType), timeZoneId), + MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz))) + checkEvaluation( + UnixTimestamp(Literal("2015-07-24"), Literal("not a valid format"), timeZoneId), null) + } + } + } + + test("to_unix_timestamp") { + val fmt1 = "yyyy-MM-dd HH:mm:ss" + val sdf1 = new SimpleDateFormat(fmt1, Locale.US) + val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS" + val sdf2 = new SimpleDateFormat(fmt2, Locale.US) + val fmt3 = "yy-MM-dd" + val sdf3 = new SimpleDateFormat(fmt3, Locale.US) + sdf3.setTimeZone(TimeZoneGMT) + + withDefaultTimeZone(TimeZoneGMT) { + for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + val timeZoneId = Option(tz.getID) + sdf1.setTimeZone(tz) + sdf2.setTimeZone(tz) + + val date1 = Date.valueOf("2015-07-24") + checkEvaluation(ToUnixTimestamp( + Literal(sdf1.format(new Timestamp(0))), Literal(fmt1), timeZoneId), 0L) + checkEvaluation(ToUnixTimestamp( + Literal(sdf1.format(new Timestamp(1000000))), Literal(fmt1), timeZoneId), + 1000L) + checkEvaluation(ToUnixTimestamp( + Literal(new Timestamp(1000000)), Literal(fmt1)), + 1000L) + checkEvaluation( + ToUnixTimestamp(Literal(date1), Literal(fmt1), timeZoneId), + MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz))) + checkEvaluation( + ToUnixTimestamp(Literal(sdf2.format(new Timestamp(-1000000))), Literal(fmt2), timeZoneId), + -1000L) + checkEvaluation(ToUnixTimestamp( + Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId), + MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis( + DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz))) + val t1 = ToUnixTimestamp( + CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long] + val t2 = ToUnixTimestamp( + CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long] + assert(t2 - t1 <= 1) + checkEvaluation(ToUnixTimestamp( + Literal.create(null, DateType), Literal.create(null, StringType), timeZoneId), null) + checkEvaluation( + ToUnixTimestamp( + Literal.create(null, DateType), Literal(fmt1), timeZoneId), + null) + checkEvaluation(ToUnixTimestamp( + Literal(date1), Literal.create(null, StringType), timeZoneId), + MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz))) + checkEvaluation( + ToUnixTimestamp(Literal("2015-07-24"), Literal("not a valid format"), timeZoneId), null) + + // SPARK-28072 The codegen path for non-literal input should also work + checkEvaluation( + expression = ToUnixTimestamp( + BoundReference(ordinal = 0, dataType = StringType, nullable = true), + BoundReference(ordinal = 1, dataType = StringType, nullable = true), + timeZoneId), + expected = 0L, + inputRow = InternalRow( + UTF8String.fromString(sdf1.format(new Timestamp(0))), UTF8String.fromString(fmt1))) + } + } + } + + test("to_utc_timestamp") { + def test(t: String, tz: String, expected: String): Unit = { + checkEvaluation( + ToUTCTimestamp( + Literal.create(if (t != null) Timestamp.valueOf(t) else null, TimestampType), + Literal.create(tz, StringType)), + if (expected != null) Timestamp.valueOf(expected) else null) + checkEvaluation( + ToUTCTimestamp( + Literal.create(if (t != null) Timestamp.valueOf(t) else null, TimestampType), + NonFoldableLiteral.create(tz, StringType)), + if (expected != null) Timestamp.valueOf(expected) else null) + } + withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { + test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00") + test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00") + test(null, "UTC", null) + test("2015-07-24 00:00:00", null, null) + test(null, null, null) + } + val msg = intercept[AnalysisException] { + test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00") + }.getMessage + assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key)) + } + + test("to_utc_timestamp - invalid time zone id") { + withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { + Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz => + val msg = intercept[java.time.DateTimeException] { + GenerateUnsafeProjection.generate( + ToUTCTimestamp( + Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil) + }.getMessage + assert(msg.contains(invalidTz)) + } + } + } + + test("from_utc_timestamp") { + def test(t: String, tz: String, expected: String): Unit = { + checkEvaluation( + FromUTCTimestamp( + Literal.create(if (t != null) Timestamp.valueOf(t) else null, TimestampType), + Literal.create(tz, StringType)), + if (expected != null) Timestamp.valueOf(expected) else null) + checkEvaluation( + FromUTCTimestamp( + Literal.create(if (t != null) Timestamp.valueOf(t) else null, TimestampType), + NonFoldableLiteral.create(tz, StringType)), + if (expected != null) Timestamp.valueOf(expected) else null) + } + withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { + test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00") + test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00") + test(null, "UTC", null) + test("2015-07-24 00:00:00", null, null) + test(null, null, null) + } + val msg = intercept[AnalysisException] { + test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00") + }.getMessage + assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key)) + } + + test("from_utc_timestamp - invalid time zone id") { + withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { + Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz => + val msg = intercept[java.time.DateTimeException] { + GenerateUnsafeProjection.generate( + FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil) + }.getMessage + assert(msg.contains(invalidTz)) + } + } + } + + test("creating values of TimestampType via make_timestamp") { + var makeTimestampExpr = MakeTimestamp( + Literal(2013), Literal(7), Literal(15), Literal(8), Literal(15), + Literal(Decimal(BigDecimal(23.5), 8, 6)), Some(Literal(ZoneId.systemDefault().getId))) + val expected = Timestamp.valueOf("2013-7-15 8:15:23.5") + checkEvaluation(makeTimestampExpr, expected) + checkEvaluation(makeTimestampExpr.copy(timezone = None), expected) + + checkEvaluation(makeTimestampExpr.copy(year = Literal.create(null, IntegerType)), null) + checkEvaluation(makeTimestampExpr.copy(year = Literal(Int.MaxValue)), null) + + checkEvaluation(makeTimestampExpr.copy(month = Literal.create(null, IntegerType)), null) + checkEvaluation(makeTimestampExpr.copy(month = Literal(13)), null) + + checkEvaluation(makeTimestampExpr.copy(day = Literal.create(null, IntegerType)), null) + checkEvaluation(makeTimestampExpr.copy(day = Literal(32)), null) + + checkEvaluation(makeTimestampExpr.copy(hour = Literal.create(null, IntegerType)), null) + checkEvaluation(makeTimestampExpr.copy(hour = Literal(25)), null) + + checkEvaluation(makeTimestampExpr.copy(min = Literal.create(null, IntegerType)), null) + checkEvaluation(makeTimestampExpr.copy(min = Literal(65)), null) + + checkEvaluation(makeTimestampExpr.copy(sec = Literal.create(null, DecimalType(8, 6))), null) + checkEvaluation(makeTimestampExpr.copy(sec = Literal(Decimal(BigDecimal(70.0), 8, 6))), null) + + makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(6), Literal(30), + Literal(23), Literal(59), Literal(Decimal(BigDecimal(60.0), 8, 6))) + checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-07-01 00:00:00")) + checkEvaluation(makeTimestampExpr.copy(sec = Literal(Decimal(BigDecimal(60.5), 8, 6))), null) + + makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(8), Literal(12), + Literal(0), Literal(0), Literal(Decimal(BigDecimal(58.000001), 8, 6))) + checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-08-12 00:00:58.000001")) + } + + test("milliseconds and microseconds") { + outstandingTimezonesIds.foreach { timezone => + var timestamp = MakeTimestamp(Literal(2019), Literal(8), Literal(10), + Literal(0), Literal(0), Literal(Decimal(BigDecimal(10.123456789), 8, 6)), + Some(Literal(timezone))) + + checkEvaluation(Milliseconds(timestamp), Decimal(BigDecimal(10123.457), 8, 3)) + checkEvaluation(Microseconds(timestamp), 10123457) + + timestamp = timestamp.copy(sec = Literal(Decimal(0.0, 8, 6))) + checkEvaluation(Milliseconds(timestamp), Decimal(0, 8, 3)) + checkEvaluation(Microseconds(timestamp), 0) + + timestamp = timestamp.copy(sec = Literal(Decimal(BigDecimal(59.999999), 8, 6))) + checkEvaluation(Milliseconds(timestamp), Decimal(BigDecimal(59999.999), 8, 3)) + checkEvaluation(Microseconds(timestamp), 59999999) + + timestamp = timestamp.copy(sec = Literal(Decimal(BigDecimal(60.0), 8, 6))) + checkEvaluation(Milliseconds(timestamp), Decimal(0, 8, 3)) + checkEvaluation(Microseconds(timestamp), 0) + } + } + + test("epoch") { + val zoneId = ZoneId.systemDefault() + val nanos = 123456000 + val timestamp = Epoch(MakeTimestamp( + Literal(2019), Literal(8), Literal(9), Literal(0), Literal(0), + Literal(Decimal(nanos / DateTimeUtils.NANOS_PER_SECOND.toDouble, 8, 6)), + Some(Literal(zoneId.getId)))) + val instant = LocalDateTime.of(2019, 8, 9, 0, 0, 0, nanos) + .atZone(zoneId).toInstant + val expected = Decimal(BigDecimal(nanos) / DateTimeUtils.NANOS_PER_SECOND + + instant.getEpochSecond + + zoneId.getRules.getOffset(instant).getTotalSeconds) + checkEvaluation(timestamp, expected) + } + + test("extract the seconds part with fraction from timestamps") { + outstandingTimezonesIds.foreach { timezone => + val timestamp = MakeTimestamp(Literal(2019), Literal(8), Literal(10), + Literal(0), Literal(0), Literal(Decimal(10.123456, 8, 6)), + Some(Literal(timezone))) + + checkEvaluation(SecondWithFraction(timestamp), Decimal(10.123456, 8, 6)) + checkEvaluation( + SecondWithFraction(timestamp.copy(sec = Literal(Decimal(59000001, 8, 6)))), + Decimal(59000001, 8, 6)) + checkEvaluation( + SecondWithFraction(timestamp.copy(sec = Literal(Decimal(1, 8, 6)))), + Decimal(0.000001, 8, 6)) + } + } + + test("timestamps difference") { + val end = Instant.parse("2019-10-04T11:04:01.123456Z") + checkEvaluation(TimestampDiff(Literal(end), Literal(end)), + new CalendarInterval(0, 0)) + checkEvaluation(TimestampDiff(Literal(end), Literal(Instant.EPOCH)), + CalendarInterval.fromString("interval 18173 days " + + "11 hours 4 minutes 1 seconds 123 milliseconds 456 microseconds")) + checkEvaluation(TimestampDiff(Literal(Instant.EPOCH), Literal(end)), + CalendarInterval.fromString("interval -18173 days " + + "-11 hours -4 minutes -1 seconds -123 milliseconds -456 microseconds")) + checkEvaluation( + TimestampDiff( + Literal(Instant.parse("9999-12-31T23:59:59.999999Z")), + Literal(Instant.parse("0001-01-01T00:00:00Z"))), + CalendarInterval.fromString("interval 521722 weeks 4 days " + + "23 hours 59 minutes 59 seconds 999 milliseconds 999 microseconds")) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index 99189a96b2995..ca94d40dfbe8d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -19,16 +19,12 @@ package org.apache.spark.sql import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat -import java.time.Instant import java.util.Locale -import java.util.concurrent.TimeUnit import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.functions._ -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types.{DoubleType, StructField, StructType} -import org.apache.spark.unsafe.types.CalendarInterval +import org.apache.spark.sql.types.DoubleType class DateFunctionsSuite extends QueryTest with SharedSparkSession { import testImplicits._ @@ -43,43 +39,11 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { assert(d0 <= d1 && d1 <= d2 && d2 <= d3 && d3 - d0 <= 1) } - test("function current_timestamp and now") { - val df1 = Seq((1, 2), (3, 1)).toDF("a", "b") - checkAnswer(df1.select(countDistinct(current_timestamp())), Row(1)) - - // Execution in one query should return the same value - checkAnswer(sql("""SELECT CURRENT_TIMESTAMP() = CURRENT_TIMESTAMP()"""), Row(true)) - - // Current timestamp should return the current timestamp ... - val before = System.currentTimeMillis - val got = sql("SELECT CURRENT_TIMESTAMP()").collect().head.getTimestamp(0).getTime - val after = System.currentTimeMillis - assert(got >= before && got <= after) - - // Now alias - checkAnswer(sql("""SELECT CURRENT_TIMESTAMP() = NOW()"""), Row(true)) - } - val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) val sdfDate = new SimpleDateFormat("yyyy-MM-dd", Locale.US) val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime) val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime) - test("timestamp comparison with date strings") { - val df = Seq( - (1, Timestamp.valueOf("2015-01-01 00:00:00")), - (2, Timestamp.valueOf("2014-01-01 00:00:00"))).toDF("i", "t") - - checkAnswer( - df.select("t").filter($"t" <= "2014-06-01"), - Row(Timestamp.valueOf("2014-01-01 00:00:00")) :: Nil) - - - checkAnswer( - df.select("t").filter($"t" >= "2014-06-01"), - Row(Timestamp.valueOf("2015-01-01 00:00:00")) :: Nil) - } - test("date comparison with date strings") { val df = Seq( (1, Date.valueOf("2015-01-01")), @@ -95,18 +59,6 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { Row(Date.valueOf("2015-01-01")) :: Nil) } - test("date format") { - val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") - - checkAnswer( - df.select(date_format($"a", "y"), date_format($"b", "y"), date_format($"c", "y")), - Row("2015", "2015", "2013")) - - checkAnswer( - df.selectExpr("date_format(a, 'y')", "date_format(b, 'y')", "date_format(c, 'y')"), - Row("2015", "2015", "2013")) - } - test("year") { val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") @@ -169,42 +121,6 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { Row(98, 98, 98)) } - test("hour") { - val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") - - checkAnswer( - df.select(hour($"a"), hour($"b"), hour($"c")), - Row(0, 13, 13)) - - checkAnswer( - df.selectExpr("hour(a)", "hour(b)", "hour(c)"), - Row(0, 13, 13)) - } - - test("minute") { - val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") - - checkAnswer( - df.select(minute($"a"), minute($"b"), minute($"c")), - Row(0, 10, 10)) - - checkAnswer( - df.selectExpr("minute(a)", "minute(b)", "minute(c)"), - Row(0, 10, 10)) - } - - test("second") { - val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") - - checkAnswer( - df.select(second($"a"), second($"b"), second($"c")), - Row(0, 15, 15)) - - checkAnswer( - df.selectExpr("second(a)", "second(b)", "second(c)"), - Row(0, 15, 15)) - } - test("weekofyear") { val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") @@ -285,38 +201,6 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { Seq(Row(Date.valueOf("2015-05-31")), Row(Date.valueOf("2015-06-01")))) } - test("time_add") { - val t1 = Timestamp.valueOf("2015-07-31 23:59:59") - val t2 = Timestamp.valueOf("2015-12-31 00:00:00") - val d1 = Date.valueOf("2015-07-31") - val d2 = Date.valueOf("2015-12-31") - val i = new CalendarInterval(2, 2000000L) - val df = Seq((1, t1, d1), (3, t2, d2)).toDF("n", "t", "d") - checkAnswer( - df.selectExpr(s"d + $i"), - Seq(Row(Date.valueOf("2015-09-30")), Row(Date.valueOf("2016-02-29")))) - checkAnswer( - df.selectExpr(s"t + $i"), - Seq(Row(Timestamp.valueOf("2015-10-01 00:00:01")), - Row(Timestamp.valueOf("2016-02-29 00:00:02")))) - } - - test("time_sub") { - val t1 = Timestamp.valueOf("2015-10-01 00:00:01") - val t2 = Timestamp.valueOf("2016-02-29 00:00:02") - val d1 = Date.valueOf("2015-09-30") - val d2 = Date.valueOf("2016-02-29") - val i = new CalendarInterval(2, 2000000L) - val df = Seq((1, t1, d1), (3, t2, d2)).toDF("n", "t", "d") - checkAnswer( - df.selectExpr(s"d - $i"), - Seq(Row(Date.valueOf("2015-07-29")), Row(Date.valueOf("2015-12-28")))) - checkAnswer( - df.selectExpr(s"t - $i"), - Seq(Row(Timestamp.valueOf("2015-07-31 23:59:59")), - Row(Timestamp.valueOf("2015-12-29 00:00:00")))) - } - test("function add_months") { val d1 = Date.valueOf("2015-08-31") val d2 = Date.valueOf("2015-02-28") @@ -332,25 +216,6 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { Seq(Row(Date.valueOf("2015-09-30")), Row(Date.valueOf("2015-03-28")))) } - test("function months_between") { - val d1 = Date.valueOf("2015-07-31") - val d2 = Date.valueOf("2015-02-16") - val t1 = Timestamp.valueOf("2014-09-30 23:30:00") - val t2 = Timestamp.valueOf("2015-09-16 12:00:00") - val s1 = "2014-09-15 11:30:00" - val s2 = "2015-10-01 00:00:00" - val df = Seq((t1, d1, s1), (t2, d2, s2)).toDF("t", "d", "s") - checkAnswer(df.select(months_between(col("t"), col("d"))), Seq(Row(-10.0), Row(7.0))) - checkAnswer(df.selectExpr("months_between(t, s)"), Seq(Row(0.5), Row(-0.5))) - checkAnswer(df.selectExpr("months_between(t, s, true)"), Seq(Row(0.5), Row(-0.5))) - Seq(true, false).foreach { roundOff => - checkAnswer(df.select(months_between(col("t"), col("d"), roundOff)), - Seq(Row(-10.0), Row(7.0))) - checkAnswer(df.withColumn("r", lit(false)).selectExpr("months_between(t, s, r)"), - Seq(Row(0.5), Row(-0.5))) - } - } - test("function last_day") { val df1 = Seq((1, "2015-07-23"), (2, "2015-07-24")).toDF("i", "d") val df2 = Seq((1, "2015-07-23 00:11:22"), (2, "2015-07-24 11:22:33")).toDF("i", "t") @@ -461,236 +326,6 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { Seq(Row(Date.valueOf("2010-01-01")), Row(Date.valueOf("2010-01-01")))) } - test("function date_trunc") { - val df = Seq( - (1, Timestamp.valueOf("2015-07-22 10:01:40.123456")), - (2, Timestamp.valueOf("2014-12-31 05:29:06.123456"))).toDF("i", "t") - - checkAnswer( - df.select(date_trunc("YY", col("t"))), - Seq(Row(Timestamp.valueOf("2015-01-01 00:00:00")), - Row(Timestamp.valueOf("2014-01-01 00:00:00")))) - - checkAnswer( - df.selectExpr("date_trunc('MONTH', t)"), - Seq(Row(Timestamp.valueOf("2015-07-01 00:00:00")), - Row(Timestamp.valueOf("2014-12-01 00:00:00")))) - - checkAnswer( - df.selectExpr("date_trunc('DAY', t)"), - Seq(Row(Timestamp.valueOf("2015-07-22 00:00:00")), - Row(Timestamp.valueOf("2014-12-31 00:00:00")))) - - checkAnswer( - df.selectExpr("date_trunc('HOUR', t)"), - Seq(Row(Timestamp.valueOf("2015-07-22 10:00:00")), - Row(Timestamp.valueOf("2014-12-31 05:00:00")))) - - checkAnswer( - df.selectExpr("date_trunc('MINUTE', t)"), - Seq(Row(Timestamp.valueOf("2015-07-22 10:01:00")), - Row(Timestamp.valueOf("2014-12-31 05:29:00")))) - - checkAnswer( - df.selectExpr("date_trunc('SECOND', t)"), - Seq(Row(Timestamp.valueOf("2015-07-22 10:01:40")), - Row(Timestamp.valueOf("2014-12-31 05:29:06")))) - - checkAnswer( - df.selectExpr("date_trunc('WEEK', t)"), - Seq(Row(Timestamp.valueOf("2015-07-20 00:00:00")), - Row(Timestamp.valueOf("2014-12-29 00:00:00")))) - - checkAnswer( - df.selectExpr("date_trunc('QUARTER', t)"), - Seq(Row(Timestamp.valueOf("2015-07-01 00:00:00")), - Row(Timestamp.valueOf("2014-10-01 00:00:00")))) - - checkAnswer( - df.selectExpr("date_trunc('MILLISECOND', t)"), - Seq(Row(Timestamp.valueOf("2015-07-22 10:01:40.123")), - Row(Timestamp.valueOf("2014-12-31 05:29:06.123")))) - - checkAnswer( - df.selectExpr("date_trunc('DECADE', t)"), - Seq(Row(Timestamp.valueOf("2010-01-01 00:00:00")), - Row(Timestamp.valueOf("2010-01-01 00:00:00")))) - - Seq("century", "millennium").foreach { level => - checkAnswer( - df.selectExpr(s"date_trunc('$level', t)"), - Seq(Row(Timestamp.valueOf("2001-01-01 00:00:00")), - Row(Timestamp.valueOf("2001-01-01 00:00:00")))) - } - } - - test("from_unixtime") { - val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) - val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS" - val sdf2 = new SimpleDateFormat(fmt2, Locale.US) - val fmt3 = "yy-MM-dd HH-mm-ss" - val sdf3 = new SimpleDateFormat(fmt3, Locale.US) - val df = Seq((1000, "yyyy-MM-dd HH:mm:ss.SSS"), (-1000, "yy-MM-dd HH-mm-ss")).toDF("a", "b") - checkAnswer( - df.select(from_unixtime(col("a"))), - Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000))))) - checkAnswer( - df.select(from_unixtime(col("a"), fmt2)), - Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000))))) - checkAnswer( - df.select(from_unixtime(col("a"), fmt3)), - Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000))))) - checkAnswer( - df.selectExpr("from_unixtime(a)"), - Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000))))) - checkAnswer( - df.selectExpr(s"from_unixtime(a, '$fmt2')"), - Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000))))) - checkAnswer( - df.selectExpr(s"from_unixtime(a, '$fmt3')"), - Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000))))) - } - - private def secs(millis: Long): Long = TimeUnit.MILLISECONDS.toSeconds(millis) - - test("unix_timestamp") { - val date1 = Date.valueOf("2015-07-24") - val date2 = Date.valueOf("2015-07-25") - val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3") - val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2") - val s1 = "2015/07/24 10:00:00.5" - val s2 = "2015/07/25 02:02:02.6" - val ss1 = "2015-07-24 10:00:00" - val ss2 = "2015-07-25 02:02:02" - val fmt = "yyyy/MM/dd HH:mm:ss.S" - val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss") - checkAnswer(df.select(unix_timestamp(col("ts"))), Seq( - Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) - checkAnswer(df.select(unix_timestamp(col("ss"))), Seq( - Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) - checkAnswer(df.select(unix_timestamp(col("d"), fmt)), Seq( - Row(secs(date1.getTime)), Row(secs(date2.getTime)))) - checkAnswer(df.select(unix_timestamp(col("s"), fmt)), Seq( - Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) - checkAnswer(df.selectExpr("unix_timestamp(ts)"), Seq( - Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) - checkAnswer(df.selectExpr("unix_timestamp(ss)"), Seq( - Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) - checkAnswer(df.selectExpr(s"unix_timestamp(d, '$fmt')"), Seq( - Row(secs(date1.getTime)), Row(secs(date2.getTime)))) - checkAnswer(df.selectExpr(s"unix_timestamp(s, '$fmt')"), Seq( - Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) - - val x1 = "2015-07-24 10:00:00" - val x2 = "2015-25-07 02:02:02" - val x3 = "2015-07-24 25:02:02" - val x4 = "2015-24-07 26:02:02" - val ts3 = Timestamp.valueOf("2015-07-24 02:25:02") - val ts4 = Timestamp.valueOf("2015-07-24 00:10:00") - - val df1 = Seq(x1, x2, x3, x4).toDF("x") - checkAnswer(df1.select(unix_timestamp(col("x"))), Seq( - Row(secs(ts1.getTime)), Row(null), Row(null), Row(null))) - checkAnswer(df1.selectExpr("unix_timestamp(x)"), Seq( - Row(secs(ts1.getTime)), Row(null), Row(null), Row(null))) - checkAnswer(df1.select(unix_timestamp(col("x"), "yyyy-dd-MM HH:mm:ss")), Seq( - Row(null), Row(secs(ts2.getTime)), Row(null), Row(null))) - checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq( - Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null))) - - // invalid format - checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')"), Seq( - Row(null), Row(null), Row(null), Row(null))) - - // february - val y1 = "2016-02-29" - val y2 = "2017-02-29" - val ts5 = Timestamp.valueOf("2016-02-29 00:00:00") - val df2 = Seq(y1, y2).toDF("y") - checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq( - Row(secs(ts5.getTime)), Row(null))) - - val now = sql("select unix_timestamp()").collect().head.getLong(0) - checkAnswer( - sql(s"select cast ($now as timestamp)"), - Row(new java.util.Date(TimeUnit.SECONDS.toMillis(now)))) - } - - test("to_unix_timestamp") { - val date1 = Date.valueOf("2015-07-24") - val date2 = Date.valueOf("2015-07-25") - val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3") - val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2") - val s1 = "2015/07/24 10:00:00.5" - val s2 = "2015/07/25 02:02:02.6" - val ss1 = "2015-07-24 10:00:00" - val ss2 = "2015-07-25 02:02:02" - val fmt = "yyyy/MM/dd HH:mm:ss.S" - val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss") - checkAnswer(df.selectExpr("to_unix_timestamp(ts)"), Seq( - Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) - checkAnswer(df.selectExpr("to_unix_timestamp(ss)"), Seq( - Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) - checkAnswer(df.selectExpr(s"to_unix_timestamp(d, '$fmt')"), Seq( - Row(secs(date1.getTime)), Row(secs(date2.getTime)))) - checkAnswer(df.selectExpr(s"to_unix_timestamp(s, '$fmt')"), Seq( - Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) - - val x1 = "2015-07-24 10:00:00" - val x2 = "2015-25-07 02:02:02" - val x3 = "2015-07-24 25:02:02" - val x4 = "2015-24-07 26:02:02" - val ts3 = Timestamp.valueOf("2015-07-24 02:25:02") - val ts4 = Timestamp.valueOf("2015-07-24 00:10:00") - - val df1 = Seq(x1, x2, x3, x4).toDF("x") - checkAnswer(df1.selectExpr("to_unix_timestamp(x)"), Seq( - Row(secs(ts1.getTime)), Row(null), Row(null), Row(null))) - checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq( - Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null))) - - // february - val y1 = "2016-02-29" - val y2 = "2017-02-29" - val ts5 = Timestamp.valueOf("2016-02-29 00:00:00") - val df2 = Seq(y1, y2).toDF("y") - checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq( - Row(secs(ts5.getTime)), Row(null))) - - // invalid format - checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')"), Seq( - Row(null), Row(null), Row(null), Row(null))) - } - - - test("to_timestamp") { - val date1 = Date.valueOf("2015-07-24") - val date2 = Date.valueOf("2015-07-25") - val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00") - val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00") - val ts1 = Timestamp.valueOf("2015-07-24 10:00:00") - val ts2 = Timestamp.valueOf("2015-07-25 02:02:02") - val s1 = "2015/07/24 10:00:00.5" - val s2 = "2015/07/25 02:02:02.6" - val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5") - val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6") - val ss1 = "2015-07-24 10:00:00" - val ss2 = "2015-07-25 02:02:02" - val fmt = "yyyy/MM/dd HH:mm:ss.S" - val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss") - - checkAnswer(df.select(to_timestamp(col("ss"))), - df.select(unix_timestamp(col("ss")).cast("timestamp"))) - checkAnswer(df.select(to_timestamp(col("ss"))), Seq( - Row(ts1), Row(ts2))) - checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq( - Row(ts1m), Row(ts2m))) - checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq( - Row(ts1), Row(ts2))) - checkAnswer(df.select(to_timestamp(col("d"), "yyyy-MM-dd")), Seq( - Row(ts_date1), Row(ts_date2))) - } - test("datediff") { val df = Seq( (Date.valueOf("2015-07-24"), Timestamp.valueOf("2015-07-24 01:00:00"), @@ -704,100 +339,6 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(df.selectExpr("datediff(a, d)"), Seq(Row(1), Row(1))) } - test("from_utc_timestamp with literal zone") { - val df = Seq( - (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"), - (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00") - ).toDF("a", "b") - withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { - checkAnswer( - df.select(from_utc_timestamp(col("a"), "PST")), - Seq( - Row(Timestamp.valueOf("2015-07-23 17:00:00")), - Row(Timestamp.valueOf("2015-07-24 17:00:00")))) - checkAnswer( - df.select(from_utc_timestamp(col("b"), "PST")), - Seq( - Row(Timestamp.valueOf("2015-07-23 17:00:00")), - Row(Timestamp.valueOf("2015-07-24 17:00:00")))) - } - val msg = intercept[AnalysisException] { - df.select(from_utc_timestamp(col("a"), "PST")).collect() - }.getMessage - assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key)) - } - - test("from_utc_timestamp with column zone") { - withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { - val df = Seq( - (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "CET"), - (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "PST") - ).toDF("a", "b", "c") - checkAnswer( - df.select(from_utc_timestamp(col("a"), col("c"))), - Seq( - Row(Timestamp.valueOf("2015-07-24 02:00:00")), - Row(Timestamp.valueOf("2015-07-24 17:00:00")))) - checkAnswer( - df.select(from_utc_timestamp(col("b"), col("c"))), - Seq( - Row(Timestamp.valueOf("2015-07-24 02:00:00")), - Row(Timestamp.valueOf("2015-07-24 17:00:00")))) - } - } - - test("to_utc_timestamp with literal zone") { - val df = Seq( - (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"), - (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00") - ).toDF("a", "b") - withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { - checkAnswer( - df.select(to_utc_timestamp(col("a"), "PST")), - Seq( - Row(Timestamp.valueOf("2015-07-24 07:00:00")), - Row(Timestamp.valueOf("2015-07-25 07:00:00")))) - checkAnswer( - df.select(to_utc_timestamp(col("b"), "PST")), - Seq( - Row(Timestamp.valueOf("2015-07-24 07:00:00")), - Row(Timestamp.valueOf("2015-07-25 07:00:00")))) - } - val msg = intercept[AnalysisException] { - df.select(to_utc_timestamp(col("a"), "PST")).collect() - }.getMessage - assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key)) - } - - test("to_utc_timestamp with column zone") { - withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { - val df = Seq( - (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "PST"), - (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "CET") - ).toDF("a", "b", "c") - checkAnswer( - df.select(to_utc_timestamp(col("a"), col("c"))), - Seq( - Row(Timestamp.valueOf("2015-07-24 07:00:00")), - Row(Timestamp.valueOf("2015-07-24 22:00:00")))) - checkAnswer( - df.select(to_utc_timestamp(col("b"), col("c"))), - Seq( - Row(Timestamp.valueOf("2015-07-24 07:00:00")), - Row(Timestamp.valueOf("2015-07-24 22:00:00")))) - } - } - - - test("to_timestamp with microseconds precision") { - withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") { - val timestamp = "1970-01-01T00:00:00.123456Z" - val df = Seq(timestamp).toDF("t") - checkAnswer(df.select(to_timestamp($"t", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSX")), - Seq(Row(Instant.parse(timestamp)))) - } - } - test("handling null field by date_part") { val input = Seq(Date.valueOf("2019-09-20")).toDF("d") Seq("date_part(null, d)", "date_part(null, date'2019-09-20')").foreach { expr => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimestampFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimestampFunctionsSuite.scala new file mode 100644 index 0000000000000..159f69116a879 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/TimestampFunctionsSuite.scala @@ -0,0 +1,478 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import java.sql.{Date, Timestamp} +import java.text.SimpleDateFormat +import java.time.Instant +import java.util.Locale +import java.util.concurrent.TimeUnit + +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.unsafe.types.CalendarInterval + +class TimestampFunctionsSuite extends QueryTest with SharedSparkSession { + import testImplicits._ + + test("function current_timestamp and now") { + val df1 = Seq((1, 2), (3, 1)).toDF("a", "b") + checkAnswer(df1.select(countDistinct(current_timestamp())), Row(1)) + + // Execution in one query should return the same value + checkAnswer(sql("""SELECT CURRENT_TIMESTAMP() = CURRENT_TIMESTAMP()"""), Row(true)) + + // Current timestamp should return the current timestamp ... + val before = System.currentTimeMillis + val got = sql("SELECT CURRENT_TIMESTAMP()").collect().head.getTimestamp(0).getTime + val after = System.currentTimeMillis + assert(got >= before && got <= after) + + // Now alias + checkAnswer(sql("""SELECT CURRENT_TIMESTAMP() = NOW()"""), Row(true)) + } + + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) + val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime) + val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime) + + test("timestamp comparison with date strings") { + val df = Seq( + (1, Timestamp.valueOf("2015-01-01 00:00:00")), + (2, Timestamp.valueOf("2014-01-01 00:00:00"))).toDF("i", "t") + + checkAnswer( + df.select("t").filter($"t" <= "2014-06-01"), + Row(Timestamp.valueOf("2014-01-01 00:00:00")) :: Nil) + + + checkAnswer( + df.select("t").filter($"t" >= "2014-06-01"), + Row(Timestamp.valueOf("2015-01-01 00:00:00")) :: Nil) + } + + test("hour") { + val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(hour($"a"), hour($"b"), hour($"c")), + Row(0, 13, 13)) + + checkAnswer( + df.selectExpr("hour(a)", "hour(b)", "hour(c)"), + Row(0, 13, 13)) + } + + test("minute") { + val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(minute($"a"), minute($"b"), minute($"c")), + Row(0, 10, 10)) + + checkAnswer( + df.selectExpr("minute(a)", "minute(b)", "minute(c)"), + Row(0, 10, 10)) + } + + test("second") { + val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(second($"a"), second($"b"), second($"c")), + Row(0, 15, 15)) + + checkAnswer( + df.selectExpr("second(a)", "second(b)", "second(c)"), + Row(0, 15, 15)) + } + + test("time_add") { + val t1 = Timestamp.valueOf("2015-07-31 23:59:59") + val t2 = Timestamp.valueOf("2015-12-31 00:00:00") + val d1 = Date.valueOf("2015-07-31") + val d2 = Date.valueOf("2015-12-31") + val i = new CalendarInterval(2, 2000000L) + val df = Seq((1, t1, d1), (3, t2, d2)).toDF("n", "t", "d") + checkAnswer( + df.selectExpr(s"d + $i"), + Seq(Row(Date.valueOf("2015-09-30")), Row(Date.valueOf("2016-02-29")))) + checkAnswer( + df.selectExpr(s"t + $i"), + Seq(Row(Timestamp.valueOf("2015-10-01 00:00:01")), + Row(Timestamp.valueOf("2016-02-29 00:00:02")))) + } + + test("time_sub") { + val t1 = Timestamp.valueOf("2015-10-01 00:00:01") + val t2 = Timestamp.valueOf("2016-02-29 00:00:02") + val d1 = Date.valueOf("2015-09-30") + val d2 = Date.valueOf("2016-02-29") + val i = new CalendarInterval(2, 2000000L) + val df = Seq((1, t1, d1), (3, t2, d2)).toDF("n", "t", "d") + checkAnswer( + df.selectExpr(s"d - $i"), + Seq(Row(Date.valueOf("2015-07-29")), Row(Date.valueOf("2015-12-28")))) + checkAnswer( + df.selectExpr(s"t - $i"), + Seq(Row(Timestamp.valueOf("2015-07-31 23:59:59")), + Row(Timestamp.valueOf("2015-12-29 00:00:00")))) + } + + test("function months_between") { + val d1 = Date.valueOf("2015-07-31") + val d2 = Date.valueOf("2015-02-16") + val t1 = Timestamp.valueOf("2014-09-30 23:30:00") + val t2 = Timestamp.valueOf("2015-09-16 12:00:00") + val s1 = "2014-09-15 11:30:00" + val s2 = "2015-10-01 00:00:00" + val df = Seq((t1, d1, s1), (t2, d2, s2)).toDF("t", "d", "s") + checkAnswer(df.select(months_between(col("t"), col("d"))), Seq(Row(-10.0), Row(7.0))) + checkAnswer(df.selectExpr("months_between(t, s)"), Seq(Row(0.5), Row(-0.5))) + checkAnswer(df.selectExpr("months_between(t, s, true)"), Seq(Row(0.5), Row(-0.5))) + Seq(true, false).foreach { roundOff => + checkAnswer(df.select(months_between(col("t"), col("d"), roundOff)), + Seq(Row(-10.0), Row(7.0))) + checkAnswer(df.withColumn("r", lit(false)).selectExpr("months_between(t, s, r)"), + Seq(Row(0.5), Row(-0.5))) + } + } + + test("function date_trunc") { + val df = Seq( + (1, Timestamp.valueOf("2015-07-22 10:01:40.123456")), + (2, Timestamp.valueOf("2014-12-31 05:29:06.123456"))).toDF("i", "t") + + checkAnswer( + df.select(date_trunc("YY", col("t"))), + Seq(Row(Timestamp.valueOf("2015-01-01 00:00:00")), + Row(Timestamp.valueOf("2014-01-01 00:00:00")))) + + checkAnswer( + df.selectExpr("date_trunc('MONTH', t)"), + Seq(Row(Timestamp.valueOf("2015-07-01 00:00:00")), + Row(Timestamp.valueOf("2014-12-01 00:00:00")))) + + checkAnswer( + df.selectExpr("date_trunc('DAY', t)"), + Seq(Row(Timestamp.valueOf("2015-07-22 00:00:00")), + Row(Timestamp.valueOf("2014-12-31 00:00:00")))) + + checkAnswer( + df.selectExpr("date_trunc('HOUR', t)"), + Seq(Row(Timestamp.valueOf("2015-07-22 10:00:00")), + Row(Timestamp.valueOf("2014-12-31 05:00:00")))) + + checkAnswer( + df.selectExpr("date_trunc('MINUTE', t)"), + Seq(Row(Timestamp.valueOf("2015-07-22 10:01:00")), + Row(Timestamp.valueOf("2014-12-31 05:29:00")))) + + checkAnswer( + df.selectExpr("date_trunc('SECOND', t)"), + Seq(Row(Timestamp.valueOf("2015-07-22 10:01:40")), + Row(Timestamp.valueOf("2014-12-31 05:29:06")))) + + checkAnswer( + df.selectExpr("date_trunc('WEEK', t)"), + Seq(Row(Timestamp.valueOf("2015-07-20 00:00:00")), + Row(Timestamp.valueOf("2014-12-29 00:00:00")))) + + checkAnswer( + df.selectExpr("date_trunc('QUARTER', t)"), + Seq(Row(Timestamp.valueOf("2015-07-01 00:00:00")), + Row(Timestamp.valueOf("2014-10-01 00:00:00")))) + + checkAnswer( + df.selectExpr("date_trunc('MILLISECOND', t)"), + Seq(Row(Timestamp.valueOf("2015-07-22 10:01:40.123")), + Row(Timestamp.valueOf("2014-12-31 05:29:06.123")))) + + checkAnswer( + df.selectExpr("date_trunc('DECADE', t)"), + Seq(Row(Timestamp.valueOf("2010-01-01 00:00:00")), + Row(Timestamp.valueOf("2010-01-01 00:00:00")))) + + Seq("century", "millennium").foreach { level => + checkAnswer( + df.selectExpr(s"date_trunc('$level', t)"), + Seq(Row(Timestamp.valueOf("2001-01-01 00:00:00")), + Row(Timestamp.valueOf("2001-01-01 00:00:00")))) + } + } + + test("from_unixtime") { + val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) + val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS" + val sdf2 = new SimpleDateFormat(fmt2, Locale.US) + val fmt3 = "yy-MM-dd HH-mm-ss" + val sdf3 = new SimpleDateFormat(fmt3, Locale.US) + val df = Seq((1000, "yyyy-MM-dd HH:mm:ss.SSS"), (-1000, "yy-MM-dd HH-mm-ss")).toDF("a", "b") + checkAnswer( + df.select(from_unixtime(col("a"))), + Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000))))) + checkAnswer( + df.select(from_unixtime(col("a"), fmt2)), + Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000))))) + checkAnswer( + df.select(from_unixtime(col("a"), fmt3)), + Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000))))) + checkAnswer( + df.selectExpr("from_unixtime(a)"), + Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000))))) + checkAnswer( + df.selectExpr(s"from_unixtime(a, '$fmt2')"), + Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000))))) + checkAnswer( + df.selectExpr(s"from_unixtime(a, '$fmt3')"), + Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000))))) + } + + private def secs(millis: Long): Long = TimeUnit.MILLISECONDS.toSeconds(millis) + + test("unix_timestamp") { + val date1 = Date.valueOf("2015-07-24") + val date2 = Date.valueOf("2015-07-25") + val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3") + val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2") + val s1 = "2015/07/24 10:00:00.5" + val s2 = "2015/07/25 02:02:02.6" + val ss1 = "2015-07-24 10:00:00" + val ss2 = "2015-07-25 02:02:02" + val fmt = "yyyy/MM/dd HH:mm:ss.S" + val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss") + checkAnswer(df.select(unix_timestamp(col("ts"))), Seq( + Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) + checkAnswer(df.select(unix_timestamp(col("ss"))), Seq( + Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) + checkAnswer(df.select(unix_timestamp(col("d"), fmt)), Seq( + Row(secs(date1.getTime)), Row(secs(date2.getTime)))) + checkAnswer(df.select(unix_timestamp(col("s"), fmt)), Seq( + Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) + checkAnswer(df.selectExpr("unix_timestamp(ts)"), Seq( + Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) + checkAnswer(df.selectExpr("unix_timestamp(ss)"), Seq( + Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) + checkAnswer(df.selectExpr(s"unix_timestamp(d, '$fmt')"), Seq( + Row(secs(date1.getTime)), Row(secs(date2.getTime)))) + checkAnswer(df.selectExpr(s"unix_timestamp(s, '$fmt')"), Seq( + Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) + + val x1 = "2015-07-24 10:00:00" + val x2 = "2015-25-07 02:02:02" + val x3 = "2015-07-24 25:02:02" + val x4 = "2015-24-07 26:02:02" + val ts3 = Timestamp.valueOf("2015-07-24 02:25:02") + val ts4 = Timestamp.valueOf("2015-07-24 00:10:00") + + val df1 = Seq(x1, x2, x3, x4).toDF("x") + checkAnswer(df1.select(unix_timestamp(col("x"))), Seq( + Row(secs(ts1.getTime)), Row(null), Row(null), Row(null))) + checkAnswer(df1.selectExpr("unix_timestamp(x)"), Seq( + Row(secs(ts1.getTime)), Row(null), Row(null), Row(null))) + checkAnswer(df1.select(unix_timestamp(col("x"), "yyyy-dd-MM HH:mm:ss")), Seq( + Row(null), Row(secs(ts2.getTime)), Row(null), Row(null))) + checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq( + Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null))) + + // invalid format + checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')"), Seq( + Row(null), Row(null), Row(null), Row(null))) + + // february + val y1 = "2016-02-29" + val y2 = "2017-02-29" + val ts5 = Timestamp.valueOf("2016-02-29 00:00:00") + val df2 = Seq(y1, y2).toDF("y") + checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq( + Row(secs(ts5.getTime)), Row(null))) + + val now = sql("select unix_timestamp()").collect().head.getLong(0) + checkAnswer( + sql(s"select cast ($now as timestamp)"), + Row(new java.util.Date(TimeUnit.SECONDS.toMillis(now)))) + } + + test("to_unix_timestamp") { + val date1 = Date.valueOf("2015-07-24") + val date2 = Date.valueOf("2015-07-25") + val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3") + val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2") + val s1 = "2015/07/24 10:00:00.5" + val s2 = "2015/07/25 02:02:02.6" + val ss1 = "2015-07-24 10:00:00" + val ss2 = "2015-07-25 02:02:02" + val fmt = "yyyy/MM/dd HH:mm:ss.S" + val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss") + checkAnswer(df.selectExpr("to_unix_timestamp(ts)"), Seq( + Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) + checkAnswer(df.selectExpr("to_unix_timestamp(ss)"), Seq( + Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) + checkAnswer(df.selectExpr(s"to_unix_timestamp(d, '$fmt')"), Seq( + Row(secs(date1.getTime)), Row(secs(date2.getTime)))) + checkAnswer(df.selectExpr(s"to_unix_timestamp(s, '$fmt')"), Seq( + Row(secs(ts1.getTime)), Row(secs(ts2.getTime)))) + + val x1 = "2015-07-24 10:00:00" + val x2 = "2015-25-07 02:02:02" + val x3 = "2015-07-24 25:02:02" + val x4 = "2015-24-07 26:02:02" + val ts3 = Timestamp.valueOf("2015-07-24 02:25:02") + val ts4 = Timestamp.valueOf("2015-07-24 00:10:00") + + val df1 = Seq(x1, x2, x3, x4).toDF("x") + checkAnswer(df1.selectExpr("to_unix_timestamp(x)"), Seq( + Row(secs(ts1.getTime)), Row(null), Row(null), Row(null))) + checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq( + Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null))) + + // february + val y1 = "2016-02-29" + val y2 = "2017-02-29" + val ts5 = Timestamp.valueOf("2016-02-29 00:00:00") + val df2 = Seq(y1, y2).toDF("y") + checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq( + Row(secs(ts5.getTime)), Row(null))) + + // invalid format + checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')"), Seq( + Row(null), Row(null), Row(null), Row(null))) + } + + test("to_timestamp") { + val date1 = Date.valueOf("2015-07-24") + val date2 = Date.valueOf("2015-07-25") + val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00") + val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00") + val ts1 = Timestamp.valueOf("2015-07-24 10:00:00") + val ts2 = Timestamp.valueOf("2015-07-25 02:02:02") + val s1 = "2015/07/24 10:00:00.5" + val s2 = "2015/07/25 02:02:02.6" + val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5") + val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6") + val ss1 = "2015-07-24 10:00:00" + val ss2 = "2015-07-25 02:02:02" + val fmt = "yyyy/MM/dd HH:mm:ss.S" + val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss") + + checkAnswer(df.select(to_timestamp(col("ss"))), + df.select(unix_timestamp(col("ss")).cast("timestamp"))) + checkAnswer(df.select(to_timestamp(col("ss"))), Seq( + Row(ts1), Row(ts2))) + checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq( + Row(ts1m), Row(ts2m))) + checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq( + Row(ts1), Row(ts2))) + checkAnswer(df.select(to_timestamp(col("d"), "yyyy-MM-dd")), Seq( + Row(ts_date1), Row(ts_date2))) + } + + test("from_utc_timestamp with literal zone") { + val df = Seq( + (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"), + (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00") + ).toDF("a", "b") + withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { + checkAnswer( + df.select(from_utc_timestamp(col("a"), "PST")), + Seq( + Row(Timestamp.valueOf("2015-07-23 17:00:00")), + Row(Timestamp.valueOf("2015-07-24 17:00:00")))) + checkAnswer( + df.select(from_utc_timestamp(col("b"), "PST")), + Seq( + Row(Timestamp.valueOf("2015-07-23 17:00:00")), + Row(Timestamp.valueOf("2015-07-24 17:00:00")))) + } + val msg = intercept[AnalysisException] { + df.select(from_utc_timestamp(col("a"), "PST")).collect() + }.getMessage + assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key)) + } + + test("from_utc_timestamp with column zone") { + withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { + val df = Seq( + (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "CET"), + (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "PST") + ).toDF("a", "b", "c") + checkAnswer( + df.select(from_utc_timestamp(col("a"), col("c"))), + Seq( + Row(Timestamp.valueOf("2015-07-24 02:00:00")), + Row(Timestamp.valueOf("2015-07-24 17:00:00")))) + checkAnswer( + df.select(from_utc_timestamp(col("b"), col("c"))), + Seq( + Row(Timestamp.valueOf("2015-07-24 02:00:00")), + Row(Timestamp.valueOf("2015-07-24 17:00:00")))) + } + } + + test("to_utc_timestamp with literal zone") { + val df = Seq( + (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"), + (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00") + ).toDF("a", "b") + withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { + checkAnswer( + df.select(to_utc_timestamp(col("a"), "PST")), + Seq( + Row(Timestamp.valueOf("2015-07-24 07:00:00")), + Row(Timestamp.valueOf("2015-07-25 07:00:00")))) + checkAnswer( + df.select(to_utc_timestamp(col("b"), "PST")), + Seq( + Row(Timestamp.valueOf("2015-07-24 07:00:00")), + Row(Timestamp.valueOf("2015-07-25 07:00:00")))) + } + val msg = intercept[AnalysisException] { + df.select(to_utc_timestamp(col("a"), "PST")).collect() + }.getMessage + assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key)) + } + + test("to_utc_timestamp with column zone") { + withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") { + val df = Seq( + (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "PST"), + (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "CET") + ).toDF("a", "b", "c") + checkAnswer( + df.select(to_utc_timestamp(col("a"), col("c"))), + Seq( + Row(Timestamp.valueOf("2015-07-24 07:00:00")), + Row(Timestamp.valueOf("2015-07-24 22:00:00")))) + checkAnswer( + df.select(to_utc_timestamp(col("b"), col("c"))), + Seq( + Row(Timestamp.valueOf("2015-07-24 07:00:00")), + Row(Timestamp.valueOf("2015-07-24 22:00:00")))) + } + } + + test("to_timestamp with microseconds precision") { + withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") { + val timestamp = "1970-01-01T00:00:00.123456Z" + val df = Seq(timestamp).toDF("t") + checkAnswer(df.select(to_timestamp($"t", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSX")), + Seq(Row(Instant.parse(timestamp)))) + } + } +}