Skip to content

Commit

Permalink
[SQL] Make date/time functions more consistent with other database sy…
Browse files Browse the repository at this point in the history
…stems.

This renames some of the functions that are just merged in order to be more consistent with other databases.

Also did some small cleanups.
  • Loading branch information
rxin committed Jul 19, 2015
1 parent 04c1b49 commit 9c08fdc
Show file tree
Hide file tree
Showing 8 changed files with 260 additions and 280 deletions.
28 changes: 19 additions & 9 deletions python/pyspark/sql/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@

__all__ += ['lag', 'lead', 'ntile']

__all__ += [
'date_format',
'year',
'quarter',
'month',
'dayofmonth',
'dayofyear',
'hour',
'weekofyear']


def _create_function(name, doc=""):
""" Create a function for aggregator by name"""
Expand Down Expand Up @@ -725,29 +735,29 @@ def day(col):


@since(1.5)
def day_of_month(col):
def dayofmonth(col):
"""
Extract the day of the month of a given date as integer.
>>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
>>> df.select(day_of_month('a').alias('day')).collect()
>>> df.select(dayofmonth('a').alias('day')).collect()
[Row(day=8)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.day_of_month(col))
return Column(sc._jvm.functions.dayofmonth(col))


@since(1.5)
def day_in_year(col):
def dayofyear(col):
"""
Extract the day of the year of a given date as integer.
>>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
>>> df.select(day_in_year('a').alias('day')).collect()
>>> df.select(dayofyear('a').alias('day')).collect()
[Row(day=98)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.day_in_year(col))
return Column(sc._jvm.functions.dayofyear(col))


@since(1.5)
Expand Down Expand Up @@ -790,16 +800,16 @@ def second(col):


@since(1.5)
def week_of_year(col):
def weekofyear(col):
"""
Extract the week number of a given date as integer.
>>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
>>> df.select(week_of_year('a').alias('week')).collect()
>>> df.select(weekofyear('a').alias('week')).collect()
[Row(week=15)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.week_of_year(col))
return Column(sc._jvm.functions.weekofyear(col))


class UserDefinedFunction(object):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,15 @@ object FunctionRegistry {
expression[CurrentDate]("current_date"),
expression[CurrentTimestamp]("current_timestamp"),
expression[DateFormatClass]("date_format"),
expression[Day]("day"),
expression[DayInYear]("day_in_year"),
expression[Day]("day_of_month"),
expression[DayOfMonth]("day"),
expression[DayOfYear]("dayofyear"),
expression[DayOfMonth]("dayofmonth"),
expression[Hour]("hour"),
expression[Month]("month"),
expression[Minute]("minute"),
expression[Quarter]("quarter"),
expression[Second]("second"),
expression[WeekOfYear]("week_of_year"),
expression[WeekOfYear]("weekofyear"),
expression[Year]("year")

)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,12 @@ case class Second(child: Expression) extends UnaryExpression with ImplicitCastIn
}
}

case class DayInYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(DateType)

override def dataType: DataType = IntegerType

override def prettyName: String = "day_in_year"

override protected def nullSafeEval(date: Any): Any = {
DateTimeUtils.getDayInYear(date.asInstanceOf[Int])
}
Expand All @@ -149,7 +147,7 @@ case class Year(child: Expression) extends UnaryExpression with ImplicitCastInpu

override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
defineCodeGen(ctx, ev, (c) =>
defineCodeGen(ctx, ev, c =>
s"""$dtu.getYear($c)"""
)
}
Expand Down Expand Up @@ -191,7 +189,7 @@ case class Month(child: Expression) extends UnaryExpression with ImplicitCastInp
}
}

case class Day(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(DateType)

Expand All @@ -215,8 +213,6 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa

override def dataType: DataType = IntegerType

override def prettyName: String = "week_of_year"

override protected def nullSafeEval(date: Any): Any = {
val c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
c.setFirstDayOfWeek(Calendar.MONDAY)
Expand All @@ -225,7 +221,7 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa
c.get(Calendar.WEEK_OF_YEAR)
}

override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String =
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
nullSafeCodeGen(ctx, ev, (time) => {
val cal = classOf[Calendar].getName
val c = ctx.freshName("cal")
Expand All @@ -237,6 +233,7 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa
${ev.primitive} = $c.get($cal.WEEK_OF_YEAR);
"""
})
}
}

case class DateFormatClass(left: Expression, right: Expression) extends BinaryExpression
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ import org.apache.spark.unsafe.types.UTF8String
* precision.
*/
object DateTimeUtils {
final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L

// see http://stackoverflow.com/questions/466321/convert-unix-timestamp-to-julian
final val JULIAN_DAY_OF_EPOCH = 2440587 // and .5
final val SECONDS_PER_DAY = 60 * 60 * 24L
final val MICROS_PER_SECOND = 1000L * 1000L
final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L

final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L

// number of days in 400 years
final val daysIn400Years: Int = 146097
// number of days between 1.1.1970 and 1.1.2001
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,27 @@ package org.apache.spark.sql.catalyst.expressions

import java.sql.{Timestamp, Date}
import java.text.SimpleDateFormat
import java.util.{TimeZone, Calendar}
import java.util.Calendar

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.{StringType, TimestampType, DateType}

class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {

val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
val sdfDate = new SimpleDateFormat("yyyy-MM-dd")
val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
val ts = new Timestamp(sdf.parse("2013-11-08 13:10:15").getTime)

test("Day in Year") {
test("DayOfYear") {
val sdfDay = new SimpleDateFormat("D")
(2002 to 2004).foreach { y =>
(0 to 11).foreach { m =>
(0 to 5).foreach { i =>
val c = Calendar.getInstance()
c.set(y, m, 28, 0, 0, 0)
c.add(Calendar.DATE, i)
checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
checkEvaluation(DayOfYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
sdfDay.format(c.getTime).toInt)
}
}
Expand All @@ -51,7 +51,7 @@ class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
val c = Calendar.getInstance()
c.set(y, m, 28, 0, 0, 0)
c.add(Calendar.DATE, i)
checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
checkEvaluation(DayOfYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
sdfDay.format(c.getTime).toInt)
}
}
Expand All @@ -63,7 +63,7 @@ class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
val c = Calendar.getInstance()
c.set(y, m, 28, 0, 0, 0)
c.add(Calendar.DATE, i)
checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
checkEvaluation(DayOfYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
sdfDay.format(c.getTime).toInt)
}
}
Expand All @@ -75,7 +75,7 @@ class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
val c = Calendar.getInstance()
c.set(y, m, 28, 0, 0, 0)
c.add(Calendar.DATE, 1)
checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
checkEvaluation(DayOfYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
sdfDay.format(c.getTime).toInt)
}
}
Expand All @@ -87,7 +87,7 @@ class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
val c = Calendar.getInstance()
c.set(y, m, 28, 0, 0, 0)
c.add(Calendar.DATE, 1)
checkEvaluation(DayInYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
checkEvaluation(DayOfYear(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
sdfDay.format(c.getTime).toInt)
}
}
Expand Down Expand Up @@ -163,19 +163,19 @@ class DateFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}

test("Day") {
checkEvaluation(Day(Cast(Literal("2000-02-29"), DateType)), 29)
checkEvaluation(Day(Literal.create(null, DateType)), null)
checkEvaluation(Day(Cast(Literal(d), DateType)), 8)
checkEvaluation(Day(Cast(Literal(sdfDate.format(d)), DateType)), 8)
checkEvaluation(Day(Cast(Literal(ts), DateType)), 8)
test("Day / DayOfMonth") {
checkEvaluation(DayOfMonth(Cast(Literal("2000-02-29"), DateType)), 29)
checkEvaluation(DayOfMonth(Literal.create(null, DateType)), null)
checkEvaluation(DayOfMonth(Cast(Literal(d), DateType)), 8)
checkEvaluation(DayOfMonth(Cast(Literal(sdfDate.format(d)), DateType)), 8)
checkEvaluation(DayOfMonth(Cast(Literal(ts), DateType)), 8)

(1999 to 2000).foreach { y =>
val c = Calendar.getInstance()
c.set(y, 0, 1, 0, 0, 0)
(0 to 365).foreach { d =>
c.add(Calendar.DATE, 1)
checkEvaluation(Day(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
checkEvaluation(DayOfMonth(Cast(Literal(new Date(c.getTimeInMillis)), DateType)),
c.get(Calendar.DAY_OF_MONTH))
}
}
Expand Down
Loading

0 comments on commit 9c08fdc

Please sign in to comment.