# Calendar and time dimension builder
## Automatically build out a time and calendar dimension via scripting

In [0]:
#drop the tables before creating them
spark.sql("DROP TABLE IF EXISTS neighbor.silver.calendar")
spark.sql("DROP TABLE IF EXISTS neighbor.silver.time")

In [0]:
from pyspark.sql.functions import (
    col, expr, date_format, dayofmonth, dayofweek, dayofyear, weekofyear, month, year, quarter,
    last_day, trunc
)

start_date = "2022-01-01"
end_date = "2024-12-31"

dates_df = (
    spark.range(
        0,
        (spark.sql(f"SELECT datediff('{end_date}', '{start_date}' )").collect()[0][0]) + 1
    )
    .withColumn("date", expr(f"date_add('{start_date}', cast(id as int))"))
    .drop("id")
)

calendar_df = (
    dates_df
    .withColumn("CalendarDate", col("date"))
    .withColumn("Day", dayofmonth(col("date")))
    .withColumn("DayOfWeek", dayofweek(col("date")))
    .withColumn("DayName", date_format(col("date"), "EEEE"))
    .withColumn("IsWeekend", expr("DayOfWeek IN (1,7)"))
    .withColumn("DayOfYear", dayofyear(col("date")))
    .withColumn("WeekOfYear", weekofyear(col("date")))
    .withColumn("Month", month(col("date")))
    .withColumn("MonthName", date_format(col("date"), "MMMM"))
    .withColumn("Quarter", quarter(col("date")))
    .withColumn("Year", year(col("date")))
    .withColumn("FirstDayOfMonth", trunc(col("date"), "month"))
    .withColumn("LastDayOfMonth", last_day(col("date")))
    .withColumn("FirstDayOfYear", trunc(col("date"), "year"))
    .withColumn("LastDayOfYear", last_day(trunc(col("date"), "year") + expr("INTERVAL 11 MONTH")))
    .withColumn("CalendarKey", date_format(col("date"), "yyyyMMdd").cast("int"))
    .select(
        "CalendarKey", "CalendarDate", "Day", "DayOfWeek", "DayName", "IsWeekend",
        "DayOfYear", "WeekOfYear", "Month", "MonthName", "Quarter", "Year",
        "FirstDayOfMonth", "LastDayOfMonth", "FirstDayOfYear", "LastDayOfYear"
    )
)

calendar_df.write.format("delta").mode("overwrite").saveAsTable("neighbor.silver.calendar")

display(calendar_df)

In [0]:
from pyspark.sql.functions import (
    lit, expr, date_format, concat_ws, lpad, col
)

IntervalsInDay = 48

TimeDF = (
    spark.range(0, IntervalsInDay)
    .withColumn("Hour", (col("id") / 2).cast("int"))
    .withColumn("Minute", (col("id") % 2) * 30)
    .withColumn(
        "TimeKey",
        concat_ws(
            "",
            lpad(col("Hour").cast("string"), 2, "0"),
            lpad(col("Minute").cast("string"), 2, "0")
        ).cast("int")
    )
    .withColumn(
        "StartTime",
        date_format(expr("make_timestamp(2000,1,1,Hour,Minute,0)"), "HH:mm:ss.SSSXXX")
    )
    .withColumn(
        "EndTime",
        date_format(expr("make_timestamp(2000,1,1,Hour,Minute+29,59)"), "HH:mm:ss.SSSXXX")
    )
    .withColumn("AMPM", expr("CASE WHEN Hour < 12 THEN 'AM' ELSE 'PM' END"))
    .withColumn("IsMorning", expr("Hour BETWEEN 5 AND 11"))
    .withColumn("IsAfternoon", expr("Hour BETWEEN 12 AND 16"))
    .withColumn("IsEvening", expr("Hour BETWEEN 17 AND 20"))
    .withColumn("IsNight", expr("Hour BETWEEN 21 AND 23 OR Hour BETWEEN 0 AND 4"))
    .select(
        "TimeKey", "StartTime", "EndTime", "Hour", "Minute", "AMPM",
        "IsMorning", "IsAfternoon", "IsEvening", "IsNight"
    )
)

TimeDF.write.format("delta").mode("overwrite").saveAsTable("neighbor.silver.time")

display(TimeDF)