# PySpark Date and Timestamp functions
- ✅ curdate()

- ✅ current_timestamp()

- ✅ timestamp_diff()

- ✅ date_add()

In [0]:
schema = "patient_id int, patient_name string, admission_date string, discharge_date string"

data = [
    (501, "Anita Sharma", "10-03-2023", "14-03-2023"),
    (502, "Rohit Mehra", "21-04-2023", "28-04-2023"),
    (503, "Emily Watson", "05-05-2023", "12-05-2023"),
    (504, "Kiran Patel", "18-07-2023", "23-07-2023"),
    (505, "Michael Clark", "02-10-2023", "10-10-2023")
]

df = spark.createDataFrame(data, schema)
df.show(truncate=False)
df.printSchema()


+----------+-------------+--------------+--------------+
|patient_id|patient_name |admission_date|discharge_date|
+----------+-------------+--------------+--------------+
|501       |Anita Sharma |10-03-2023    |14-03-2023    |
|502       |Rohit Mehra  |21-04-2023    |28-04-2023    |
|503       |Emily Watson |05-05-2023    |12-05-2023    |
|504       |Kiran Patel  |18-07-2023    |23-07-2023    |
|505       |Michael Clark|02-10-2023    |10-10-2023    |
+----------+-------------+--------------+--------------+

root
 |-- patient_id: integer (nullable = true)
 |-- patient_name: string (nullable = true)
 |-- admission_date: string (nullable = true)
 |-- discharge_date: string (nullable = true)



# Converting String to Date

In [0]:
from pyspark.sql.functions import curdate, current_timestamp, timestamp_diff,to_date

df = df.withColumn("admission_date", to_date("admission_date", "dd-MM-yyyy"))
df = df.withColumn("discharge_date", to_date("discharge_date", "dd-MM-yyyy"))
df.printSchema()


root
 |-- patient_id: integer (nullable = true)
 |-- patient_name: string (nullable = true)
 |-- admission_date: date (nullable = true)
 |-- discharge_date: date (nullable = true)



### curdate() and current_timestamp()

✔ curdate()

Returns the current system date.

✔ current_timestamp()

Returns the current date + time (timestamp).

In [0]:
df = df.\
        withColumn("last_updated_date", curdate()).\
        withColumn("last_updated_timestamp", current_timestamp())

df.display()


patient_id,patient_name,admission_date,discharge_date,last_updated_date,last_updated_timestamp
501,Anita Sharma,2023-03-10,2023-03-14,2025-12-09,2025-12-09T07:54:44.092Z
502,Rohit Mehra,2023-04-21,2023-04-28,2025-12-09,2025-12-09T07:54:44.092Z
503,Emily Watson,2023-05-05,2023-05-12,2025-12-09,2025-12-09T07:54:44.092Z
504,Kiran Patel,2023-07-18,2023-07-23,2025-12-09,2025-12-09T07:54:44.092Z
505,Michael Clark,2023-10-02,2023-10-10,2025-12-09,2025-12-09T07:54:44.092Z


### Calculate Date Differences using timestamp_diff
Syntax -  timestamp_diff(unit, start, end)

- YEAR
- MONTH
- DAY
- HOUR
- MINUTE
- SECOND
- MILLISECOND
- MICROSECOND

In [0]:
df.withColumn("days_since_discharge", timestamp_diff("DAY", "discharge_date", "last_updated_date")).display()


patient_id,patient_name,admission_date,discharge_date,last_updated_date,last_updated_timestamp,days_since_discharge
501,Anita Sharma,2023-03-10,2023-03-14,2025-12-09,2025-12-09T08:05:26.853Z,1001
502,Rohit Mehra,2023-04-21,2023-04-28,2025-12-09,2025-12-09T08:05:26.853Z,956
503,Emily Watson,2023-05-05,2023-05-12,2025-12-09,2025-12-09T08:05:26.853Z,942
504,Kiran Patel,2023-07-18,2023-07-23,2025-12-09,2025-12-09T08:05:26.853Z,870
505,Michael Clark,2023-10-02,2023-10-10,2025-12-09,2025-12-09T08:05:26.853Z,791


In [0]:
df.withColumn("month_since_discharge", timestamp_diff("MONTH", "discharge_date", "last_updated_date")).display()

df.withColumn("year_since_discharge", timestamp_diff("YEAR", "discharge_date", "last_updated_date")).display()

df.withColumn("minute_since_discharge", timestamp_diff("MINUTE", "discharge_date", "last_updated_date")).display()


patient_id,patient_name,admission_date,discharge_date,last_updated_date,last_updated_timestamp,month_since_discharge
501,Anita Sharma,2023-03-10,2023-03-14,2025-12-09,2025-12-09T08:05:58.781Z,32
502,Rohit Mehra,2023-04-21,2023-04-28,2025-12-09,2025-12-09T08:05:58.781Z,31
503,Emily Watson,2023-05-05,2023-05-12,2025-12-09,2025-12-09T08:05:58.781Z,30
504,Kiran Patel,2023-07-18,2023-07-23,2025-12-09,2025-12-09T08:05:58.781Z,28
505,Michael Clark,2023-10-02,2023-10-10,2025-12-09,2025-12-09T08:05:58.781Z,25


patient_id,patient_name,admission_date,discharge_date,last_updated_date,last_updated_timestamp,year_since_discharge
501,Anita Sharma,2023-03-10,2023-03-14,2025-12-09,2025-12-09T08:05:59.086Z,2
502,Rohit Mehra,2023-04-21,2023-04-28,2025-12-09,2025-12-09T08:05:59.086Z,2
503,Emily Watson,2023-05-05,2023-05-12,2025-12-09,2025-12-09T08:05:59.086Z,2
504,Kiran Patel,2023-07-18,2023-07-23,2025-12-09,2025-12-09T08:05:59.086Z,2
505,Michael Clark,2023-10-02,2023-10-10,2025-12-09,2025-12-09T08:05:59.086Z,2


patient_id,patient_name,admission_date,discharge_date,last_updated_date,last_updated_timestamp,minute_since_discharge
501,Anita Sharma,2023-03-10,2023-03-14,2025-12-09,2025-12-09T08:05:59.455Z,1441440
502,Rohit Mehra,2023-04-21,2023-04-28,2025-12-09,2025-12-09T08:05:59.455Z,1376640
503,Emily Watson,2023-05-05,2023-05-12,2025-12-09,2025-12-09T08:05:59.455Z,1356480
504,Kiran Patel,2023-07-18,2023-07-23,2025-12-09,2025-12-09T08:05:59.455Z,1252800
505,Michael Clark,2023-10-02,2023-10-10,2025-12-09,2025-12-09T08:05:59.455Z,1139040



### Adding or Subtracting Days using date_add

- Syntax -  date_add(start_date, days)

- Add +days → move forward

- Add -days → move backward

In [0]:
from pyspark.sql.functions import date_add

df.withColumn("discharge_date_+10", date_add("discharge_date", 10)).display()
df.withColumn("discharge_date_-10", date_add("discharge_date", -10)).display()


patient_id,patient_name,admission_date,discharge_date,last_updated_date,last_updated_timestamp,discharge_date_+10
501,Anita Sharma,2023-03-10,2023-03-14,2025-12-09,2025-12-09T08:19:29.139Z,2023-03-24
502,Rohit Mehra,2023-04-21,2023-04-28,2025-12-09,2025-12-09T08:19:29.139Z,2023-05-08
503,Emily Watson,2023-05-05,2023-05-12,2025-12-09,2025-12-09T08:19:29.139Z,2023-05-22
504,Kiran Patel,2023-07-18,2023-07-23,2025-12-09,2025-12-09T08:19:29.139Z,2023-08-02
505,Michael Clark,2023-10-02,2023-10-10,2025-12-09,2025-12-09T08:19:29.139Z,2023-10-20


patient_id,patient_name,admission_date,discharge_date,last_updated_date,last_updated_timestamp,discharge_date_-10
501,Anita Sharma,2023-03-10,2023-03-14,2025-12-09,2025-12-09T08:19:29.537Z,2023-03-04
502,Rohit Mehra,2023-04-21,2023-04-28,2025-12-09,2025-12-09T08:19:29.537Z,2023-04-18
503,Emily Watson,2023-05-05,2023-05-12,2025-12-09,2025-12-09T08:19:29.537Z,2023-05-02
504,Kiran Patel,2023-07-18,2023-07-23,2025-12-09,2025-12-09T08:19:29.537Z,2023-07-13
505,Michael Clark,2023-10-02,2023-10-10,2025-12-09,2025-12-09T08:19:29.537Z,2023-09-30


## Thanks for Watching

## Like
## Subscribe
## Share