In [12]:
import numpy as np
import pandas as pd
import datetime
from pyspark.sql import SparkSession

spark = SparkSession \
    .builder \
    .appName('my_first_app_name') \
    .getOrCreate()

In [26]:
# Python时间函数 和 Pandas时间函数 看“Time_utils.py”
# 1、获取当前日期
from pyspark.sql.functions import current_date

temp = spark.range(3).withColumn('date',current_date())
temp.show()
print(temp.select('id', 'date').take(1)[0][1], type(temp.select('id', 'date').take(1)[0][1])) # 就是 datetime.date

temp2 = datetime.datetime.today()
print(temp2, type(temp2), temp2.day, type(temp2.day))

temp3 = datetime.date.today()
print(temp3, type(temp3), temp3.day, type(temp3.day))

+---+----------+
| id|      date|
+---+----------+
|  0|2020-02-25|
|  1|2020-02-25|
|  2|2020-02-25|
+---+----------+

2020-02-25 <class 'datetime.date'>
2020-02-25 18:17:14.903051 <class 'datetime.datetime'> 25 <class 'int'>
2020-02-25 <class 'datetime.date'> 25 <class 'int'>


In [28]:
# 2、日期格式转换
from pyspark.sql.functions import date_format

sdf = spark.createDataFrame([('2015-04-08',)], ['a'])
sdf.select(date_format('a', 'yyyy/MM/dd').alias('date')).show()

+----------+
|      date|
+----------+
|2015/04/08|
+----------+



In [30]:
# 3、字符转日期
from pyspark.sql.functions import to_date, to_timestamp

# 1.转日期
sdf = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
sdf.select(to_date(sdf.t).alias('date')).show()

# 2.带时间的日期
sdf = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
sdf.select(to_timestamp(sdf.t).alias('dt')).show()

# 还可以指定日期格式
sdf = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
sdf.select(to_timestamp(sdf.t, 'yyyy-MM-dd HH:mm:ss').alias('dt')).show()

+----------+
|      date|
+----------+
|1997-02-28|
+----------+

+-------------------+
|                 dt|
+-------------------+
|1997-02-28 10:30:00|
+-------------------+

+-------------------+
|                 dt|
+-------------------+
|1997-02-28 10:30:00|
+-------------------+



In [31]:
# 4、获取日期中的年月日
from pyspark.sql.functions import year, month, dayofmonth

sdf = spark.createDataFrame([('2015-04-08',)], ['a'])
sdf.select(year('a').alias('year'), 
          month('a').alias('month'),
          dayofmonth('a').alias('day')
  ).show()

+----+-----+---+
|year|month|day|
+----+-----+---+
|2015|    4|  8|
+----+-----+---+



In [32]:
# 5、获取时分秒
from pyspark.sql.functions import hour, minute, second

df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
df.select(hour('a').alias('hour'),
          minute('a').alias('minute'),
          second('a').alias('second')
          ).show()

+----+------+------+
|hour|minute|second|
+----+------+------+
|  13|     8|    15|
+----+------+------+



In [33]:
# 6、获取日期对应的季度
from pyspark.sql.functions import quarter

sdf = spark.createDataFrame([('2015-04-08',)], ['a'])
sdf.select(quarter('a').alias('quarter')).show()

+-------+
|quarter|
+-------+
|      2|
+-------+



In [34]:
# 7、日期加减
from pyspark.sql.functions import date_add, date_sub

sdf = spark.createDataFrame([('2015-04-08',)], ['d'])
sdf.select(date_add(sdf.d, 1).alias('d-add'),
           date_sub(sdf.d, 1).alias('d-sub')
    ).show()

+----------+----------+
|     d-add|     d-sub|
+----------+----------+
|2015-04-09|2015-04-07|
+----------+----------+



In [35]:
# 8、月份加减
from pyspark.sql.functions import add_months

df = spark.createDataFrame([('2015-04-08',)], ['d'])
df.select(add_months(df.d, 1).alias('d')).show()

+----------+
|         d|
+----------+
|2015-05-08|
+----------+



In [37]:
# 9、日期差,月份差
from pyspark.sql.functions import datediff, months_between

# 1.日期差
sdf = spark.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2'])
sdf.select(datediff(sdf.d2, sdf.d1).alias('diff')).show()

# 2.月份差
sdf = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['t', 'd'])
sdf.select(months_between(sdf.t, sdf.d).alias('months')).show()

+----+
|diff|
+----+
|  32|
+----+

+----------+
|    months|
+----------+
|3.94959677|
+----------+



In [38]:
# 10、计算下一个指定星期的日期
from pyspark.sql.functions import next_day

# "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
sdf = spark.createDataFrame([('2015-07-27',)], ['d'])
sdf.select(next_day(sdf.d, 'Sun').alias('date')).show()

+----------+
|      date|
+----------+
|2015-08-02|
+----------+



In [39]:
# 11、本月的最后一个日期
from pyspark.sql.functions import last_day

sdf = spark.createDataFrame([('1997-02-10',)], ['d'])
sdf.select(last_day(sdf.d).alias('date')).show()

+----------+
|      date|
+----------+
|1997-02-28|
+----------+

