In [1]:
emp = [(10, "AAA", "dept1", 1000, "2019-02-01 15:12:13"),
    (8, "BBB", "dept1", 1100, "2018-04-01 5:12:3"),
    (6, "CCC", "dept1", 3000, "2017-06-05 1:2:13"),
    (13, "DDD", "dept1", 1500, "2019-08-10 10:52:53"),
    (2, "EEE", "dept2", 8000, "2016-01-11 5:52:43"),
    (1, "FFF", "dept2", 7200, "2015-04-14 19:32:33"),
    (11, "GGG", "dept3", 7100, "2019-02-21 15:42:43"),
    (5, "HHH", "dept3", 3700, "2016-09-25 15:32:33"),
    (7, "III", "dept3", 4500, "2017-10-15 15:22:23"),
    (9, "JJJ", "dept5", 3400, "2018-12-17 15:14:17"),
      (4, "KKK", "dept5", 3400, "2016-09-11 05:52:43"),
      (3, "LLL", "dept5", 3400, "2016-09-11 00:00:00"),
      (12, "MMM", "dept3", 7100, "2019-02-28 15:42:43")]

In [2]:
from pyspark.sql.functions import *

In [3]:
emp_df = spark.createDataFrame(emp, ["id", "name", "dept", "salary", "date"])

emp_df.printSchema()


root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- dept: string (nullable = true)
 |-- salary: long (nullable = true)
 |-- date: string (nullable = true)



In [4]:
correctedType_df = emp_df.withColumn('date',to_timestamp('date','yyyy-MM-dd HH:mm:ss'))



In [5]:
correctedType_df.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- dept: string (nullable = true)
 |-- salary: long (nullable = true)
 |-- date: timestamp (nullable = true)



In [7]:
spark.conf.set("spark.sql.legacy.timeParserPolicy", "LEGACY")

In [8]:
correctedType_df.orderBy('date', ascending=True).show()

+---+----+-----+------+-------------------+
| id|name| dept|salary|               date|
+---+----+-----+------+-------------------+
|  1| FFF|dept2|  7200|2015-04-14 19:32:33|
|  2| EEE|dept2|  8000|2016-01-11 05:52:43|
|  3| LLL|dept5|  3400|2016-09-11 00:00:00|
|  4| KKK|dept5|  3400|2016-09-11 05:52:43|
|  5| HHH|dept3|  3700|2016-09-25 15:32:33|
|  6| CCC|dept1|  3000|2017-06-05 01:02:13|
|  7| III|dept3|  4500|2017-10-15 15:22:23|
|  8| BBB|dept1|  1100|2018-04-01 05:12:03|
|  9| JJJ|dept5|  3400|2018-12-17 15:14:17|
| 10| AAA|dept1|  1000|2019-02-01 15:12:13|
| 11| GGG|dept3|  7100|2019-02-21 15:42:43|
| 12| MMM|dept3|  7100|2019-02-28 15:42:43|
| 13| DDD|dept1|  1500|2019-08-10 10:52:53|
+---+----+-----+------+-------------------+



In [9]:
orderedDate_df = correctedType_df.orderBy('date', ascending=True)

In [10]:
orderedDate_df.withColumn('datediff',datediff(current_date(),col('date'))).show()

+---+----+-----+------+-------------------+--------+
| id|name| dept|salary|               date|datediff|
+---+----+-----+------+-------------------+--------+
|  1| FFF|dept2|  7200|2015-04-14 19:32:33|    2272|
|  2| EEE|dept2|  8000|2016-01-11 05:52:43|    2000|
|  3| LLL|dept5|  3400|2016-09-11 00:00:00|    1756|
|  4| KKK|dept5|  3400|2016-09-11 05:52:43|    1756|
|  5| HHH|dept3|  3700|2016-09-25 15:32:33|    1742|
|  6| CCC|dept1|  3000|2017-06-05 01:02:13|    1489|
|  7| III|dept3|  4500|2017-10-15 15:22:23|    1357|
|  8| BBB|dept1|  1100|2018-04-01 05:12:03|    1189|
|  9| JJJ|dept5|  3400|2018-12-17 15:14:17|     929|
| 10| AAA|dept1|  1000|2019-02-01 15:12:13|     883|
| 11| GGG|dept3|  7100|2019-02-21 15:42:43|     863|
| 12| MMM|dept3|  7100|2019-02-28 15:42:43|     856|
| 13| DDD|dept1|  1500|2019-08-10 10:52:53|     693|
+---+----+-----+------+-------------------+--------+



In [11]:
orderedDate_df.withColumn('day_of_week',dayofweek(col('date')))\
.withColumn('week_of_year',weekofyear(col('date'))).show()

+---+----+-----+------+-------------------+-----------+------------+
| id|name| dept|salary|               date|day_of_week|week_of_year|
+---+----+-----+------+-------------------+-----------+------------+
|  1| FFF|dept2|  7200|2015-04-14 19:32:33|          3|          16|
|  2| EEE|dept2|  8000|2016-01-11 05:52:43|          2|           2|
|  3| LLL|dept5|  3400|2016-09-11 00:00:00|          1|          36|
|  4| KKK|dept5|  3400|2016-09-11 05:52:43|          1|          36|
|  5| HHH|dept3|  3700|2016-09-25 15:32:33|          1|          38|
|  6| CCC|dept1|  3000|2017-06-05 01:02:13|          2|          23|
|  7| III|dept3|  4500|2017-10-15 15:22:23|          1|          41|
|  8| BBB|dept1|  1100|2018-04-01 05:12:03|          1|          13|
|  9| JJJ|dept5|  3400|2018-12-17 15:14:17|          2|          51|
| 10| AAA|dept1|  1000|2019-02-01 15:12:13|          6|           5|
| 11| GGG|dept3|  7100|2019-02-21 15:42:43|          5|           8|
| 12| MMM|dept3|  7100|2019-02-28 