
# PySpark Date & Timestamp Functions in Databricks

In [0]:
schema = "order_id int, customer_name string, order_date string, delivery_date string"

data = [
    (101, "John Doe", "12-05-2023", "16-05-2023"),
    (102, "Maria Green", "25-07-2023", "29-07-2023"),
    (103, "Ramit Kumar", "02-09-2023", "05-09-2023"),
    (104, "Sophia Lee", "18-11-2023", "23-11-2023"),
    (105, "David King", "01-01-2024", "07-01-2024")
]

df = spark.createDataFrame(data, schema)
df.printSchema()
df.show(truncate=False)


# Converting Order Date to PySpark Date Type

In [0]:
from pyspark.sql.functions import to_date, to_timestamp, date_format

df = df.withColumn("order_date_converted", to_date("order_date", "dd-MM-yyyy"))
df = df.withColumn("delivery_date_converted", to_date("delivery_date", "dd-MM-yyyy"))

df.display()


# Converting Date Strings to Timestamps

In [0]:
df = df.withColumn("order_timestamp", to_timestamp("order_date", "dd-MM-yyyy"))
df = df.withColumn("delivery_timestamp", to_timestamp("delivery_date", "dd-MM-yyyy"))

df.display()


## Extract Year-Month (YYYY-MM) for Order Date

In [0]:
df.withColumn("order_year_month", date_format("order_date_converted", "yyyy-MM")).display()


## Extract Month Name & Year (MMMM, yyyy)

In [0]:
df.withColumn("order_month_year", date_format("order_date_converted", "MMMM, yyyy")).display()


### Calculate Delivery Duration

In [0]:
from pyspark.sql.functions import datediff

df.withColumn("delivery_days", datediff("delivery_date_converted", "order_date_converted")).display()



### ✅ How to create a meaningful Order DataFrame
### ✅ How to convert string dates into PySpark Date and Timestamp types
### ✅ How to extract year, month, month-year using date_format()
### ✅ How to calculate delivery duration using datediff()

# Thanks for Watching 
# Subscribe  Like  Share