In [13]:
emp = [
    (10, "AAA", "dept1", 1000, "2019-02-01 15:12:13"),
    (8, "BBB", "dept1", 1100, "2018-04-01 05:12:03"),
    (6, "CCC", "dept1", 3000, "2017-06-05 01:02:13"),
    (13, "DDD", "dept1", 1500, "2019-08-10 10:52:53"),
    (2, "EEE", "dept2", 8000, "2016-01-11 05:52:43"),
    (1, "FFF", "dept2", 7200, "2015-04-14 19:32:33"),
    (11, "GGG", "dept3", 7100, "2019-02-21 15:42:43"),
    (5, "HHH", "dept3", 3700, "2016-09-25 15:32:33"),
    (7, "III", "dept3", 4500, "2017-10-15 15:22:23"),
    (9, "JJJ", "dept5", 3400, "2018-12-17 15:14:17"),
    (4, "KKK", "dept5", 3400, "2016-09-11 05:52:43"),
    (3, "LLL", "dept5", 3400, "2016-09-11 00:00:00"),
    (12, "MMM", "dept3", 7100, "2019-02-28 15:42:43")
]

In [14]:
import pyspark.sql.functions as F

In [15]:
emp_df = spark.createDataFrame(emp, ['id', 'name', 'dept', 'salary', 'date'])
emp_df.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- dept: string (nullable = true)
 |-- salary: long (nullable = true)
 |-- date: string (nullable = true)



In [16]:
correctedType_df = (
    emp_df
    .withColumn('date', F.to_timestamp('date', 'yyyy-MM-dd HH:mm:ss'))
)

In [17]:
correctedType_df.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- dept: string (nullable = true)
 |-- salary: long (nullable = true)
 |-- date: timestamp (nullable = true)



In [18]:
correctedType_df.orderBy('date', ascending=True).show()

+---+----+-----+------+-------------------+
| id|name| dept|salary|               date|
+---+----+-----+------+-------------------+
|  1| FFF|dept2|  7200|2015-04-14 19:32:33|
|  2| EEE|dept2|  8000|2016-01-11 05:52:43|
|  3| LLL|dept5|  3400|2016-09-11 00:00:00|
|  4| KKK|dept5|  3400|2016-09-11 05:52:43|
|  5| HHH|dept3|  3700|2016-09-25 15:32:33|
|  6| CCC|dept1|  3000|2017-06-05 01:02:13|
|  7| III|dept3|  4500|2017-10-15 15:22:23|
|  8| BBB|dept1|  1100|2018-04-01 05:12:03|
|  9| JJJ|dept5|  3400|2018-12-17 15:14:17|
| 10| AAA|dept1|  1000|2019-02-01 15:12:13|
| 11| GGG|dept3|  7100|2019-02-21 15:42:43|
| 12| MMM|dept3|  7100|2019-02-28 15:42:43|
| 13| DDD|dept1|  1500|2019-08-10 10:52:53|
+---+----+-----+------+-------------------+



                                                                                

In [19]:
orderedDate_df = correctedType_df.orderBy('date', ascending=True)

In [20]:
(
    orderedDate_df
    .withColumn('datediff', F.datediff(
        F.current_date(), F.col('date')
    ))
    .show()
)

+---+----+-----+------+-------------------+--------+
| id|name| dept|salary|               date|datediff|
+---+----+-----+------+-------------------+--------+
|  1| FFF|dept2|  7200|2015-04-14 19:32:33|    2465|
|  2| EEE|dept2|  8000|2016-01-11 05:52:43|    2193|
|  3| LLL|dept5|  3400|2016-09-11 00:00:00|    1949|
|  4| KKK|dept5|  3400|2016-09-11 05:52:43|    1949|
|  5| HHH|dept3|  3700|2016-09-25 15:32:33|    1935|
|  6| CCC|dept1|  3000|2017-06-05 01:02:13|    1682|
|  7| III|dept3|  4500|2017-10-15 15:22:23|    1550|
|  8| BBB|dept1|  1100|2018-04-01 05:12:03|    1382|
|  9| JJJ|dept5|  3400|2018-12-17 15:14:17|    1122|
| 10| AAA|dept1|  1000|2019-02-01 15:12:13|    1076|
| 11| GGG|dept3|  7100|2019-02-21 15:42:43|    1056|
| 12| MMM|dept3|  7100|2019-02-28 15:42:43|    1049|
| 13| DDD|dept1|  1500|2019-08-10 10:52:53|     886|
+---+----+-----+------+-------------------+--------+



In [21]:
(
    orderedDate_df
    .withColumn('day_of_week', F.dayofweek(F.col('date')))
    .withColumn('week_of_year', F.weekofyear(F.col('date')))
    .show()
)

+---+----+-----+------+-------------------+-----------+------------+
| id|name| dept|salary|               date|day_of_week|week_of_year|
+---+----+-----+------+-------------------+-----------+------------+
|  1| FFF|dept2|  7200|2015-04-14 19:32:33|          3|          16|
|  2| EEE|dept2|  8000|2016-01-11 05:52:43|          2|           2|
|  3| LLL|dept5|  3400|2016-09-11 00:00:00|          1|          36|
|  4| KKK|dept5|  3400|2016-09-11 05:52:43|          1|          36|
|  5| HHH|dept3|  3700|2016-09-25 15:32:33|          1|          38|
|  6| CCC|dept1|  3000|2017-06-05 01:02:13|          2|          23|
|  7| III|dept3|  4500|2017-10-15 15:22:23|          1|          41|
|  8| BBB|dept1|  1100|2018-04-01 05:12:03|          1|          13|
|  9| JJJ|dept5|  3400|2018-12-17 15:14:17|          2|          51|
| 10| AAA|dept1|  1000|2019-02-01 15:12:13|          6|           5|
| 11| GGG|dept3|  7100|2019-02-21 15:42:43|          5|           8|
| 12| MMM|dept3|  7100|2019-02-28 