- Author: Ben Du
- Date: 2020-06-17
- Title: Date Functions in Spark
- Slug: spark-dataframe-func-date
- Category: Computer Science
- Tags: programming, Scala, Spark, DataFrame, date, Spark SQL, functions

https://spark.apache.org/docs/2.1.1/api/java/index.html?org/apache/spark/sql/functions.html

## Comment

It seems that these functions work on string of the format "YYYY-mm-dd".
Check whether they work on other formats!!!

In [1]:
%%classpath add mvn
org.apache.spark spark-core_2.11 2.3.1
org.apache.spark spark-sql_2.11 2.3.1

In [2]:
import org.apache.spark.sql.functions._


import org.apache.spark.sql.functions._


In [7]:
import org.apache.spark.sql.SparkSession

val spark = SparkSession
    .builder()
    .master("local")
    .appName("data_add example")
    .config("spark.some.config.option", "some-value")
    .getOrCreate()
import spark.implicits._

org.apache.spark.sql.SparkSession$implicits$@716fc028

## date_add

In [8]:
val df = Seq(
    ("2017-01-01", "2017-01-07"),
    ("2017-02-01", "2019-02-10")
).toDF("d1", "d2")
df.show

+----------+----------+
|        d1|        d2|
+----------+----------+
|2017-01-01|2017-01-07|
|2017-02-01|2019-02-10|
+----------+----------+



null

In [9]:
import org.apache.spark.sql.functions._

val df1 = df.withColumn("d3", date_sub($"d1", 30))
    .withColumn("d4", date_add($"d1", 30))
    .withColumn("check", $"d2".between($"d3", $"d4"))
df1.show
df1.schema

+----------+----------+----------+----------+-----+
|        d1|        d2|        d3|        d4|check|
+----------+----------+----------+----------+-----+
|2017-01-01|2017-01-07|2016-12-02|2017-01-31| true|
|2017-02-01|2019-02-10|2017-01-02|2017-03-03|false|
+----------+----------+----------+----------+-----+



[[StructField(d1,StringType,true), StructField(d2,StringType,true), StructField(d3,DateType,true), StructField(d4,DateType,true), StructField(check,BooleanType,true)]]

## date_sub

## datediff

In [15]:
val df2 = df.withColumn("diff", datediff($"d2", $"d1"))
df2.show
df2.schema

+----------+----------+----+
|        d1|        d2|diff|
+----------+----------+----+
|2017-01-01|2017-01-07|   6|
|2017-02-01|2019-02-10| 739|
+----------+----------+----+



[[StructField(d1,StringType,true), StructField(d2,StringType,true), StructField(diff,IntegerType,true)]]

## current_date

In [17]:
val df3 = df.withColumn("current", current_date())
df3.show
df3.schema

+----------+----------+----------+
|        d1|        d2|   current|
+----------+----------+----------+
|2017-01-01|2017-01-07|2018-05-02|
|2017-02-01|2019-02-10|2018-05-02|
+----------+----------+----------+



[[StructField(d1,StringType,true), StructField(d2,StringType,true), StructField(current,DateType,false)]]

## dayofmonth

In [21]:
val df4 = df.withColumn("day_of_d2", dayofmonth($"d2"))
df4.show
df4.schema

+----------+----------+---------+
|        d1|        d2|day_of_d2|
+----------+----------+---------+
|2017-01-01|2017-01-07|        7|
|2017-02-01|2019-02-10|       10|
+----------+----------+---------+



[[StructField(d1,StringType,true), StructField(d2,StringType,true), StructField(day_of_d2,IntegerType,true)]]

## dayofyear

In [22]:
val df5 = df.withColumn("day_of_year_d1", dayofyear($"d1")).withColumn("day_of_year_d2", dayofyear($"d2"))
df5.show
df5.schema

+----------+----------+--------------+--------------+
|        d1|        d2|day_of_year_d1|day_of_year_d2|
+----------+----------+--------------+--------------+
|2017-01-01|2017-01-07|             1|             7|
|2017-02-01|2019-02-10|            32|            41|
+----------+----------+--------------+--------------+



[[StructField(d1,StringType,true), StructField(d2,StringType,true), StructField(day_of_year_d1,IntegerType,true), StructField(day_of_year_d2,IntegerType,true)]]

## date_format

In [25]:
val df6 = df.withColumn("format_d1", date_format($"d1", "dd/MM/yyyy"))
df6.show
df6.schema

+----------+----------+----------+
|        d1|        d2| format_d1|
+----------+----------+----------+
|2017-01-01|2017-01-07|01/01/2017|
|2017-02-01|2019-02-10|01/02/2017|
+----------+----------+----------+



[[StructField(d1,StringType,true), StructField(d2,StringType,true), StructField(format_d1,StringType,true)]]