In [0]:
from pyspark.sql import functions as F, types as T

In [0]:
# start date and end date
start_date = "2024-01-01"
end_date = "2025-12-31"

In [0]:
# generate one row per month start between start_date and end_date
# sequence() is like range() in Python and explode() function is used to take an array (a single list in one row) and "flat it out" so that each element in the array becomes its own separate row
df = spark.sql(
    f"""
               select explode(
                   sequence(to_date("{start_date}"), to_date("{end_date}"), interval 1 month)
               ) as month_start_date
"""
)

# display(df)
df = (
    df.withColumn("date_key", F.date_format("month_start_date", "yyyyMMdd").cast("int"))
    .withColumn("year", F.year("month_start_date"))
    .withColumn("month_name", F.date_format("month_start_date", "MMMM"))
    .withColumn("month_short_name", F.date_format("month_start_date", "MMM"))
    .withColumn("quarter", F.concat(F.lit("Q"), F.quarter("month_start_date")))
    .withColumn(
        "year_quarter",
        F.concat(F.col("year"), F.lit("-Q"), F.quarter("month_start_date")),
    )  # 2025Q-1
)
display(df)

In [0]:
df.write.format("delta").mode("overwrite").option("mergeSchema", "true").saveAsTable(
    "fmcg.gold.dim_date"
)