In [7]:
# --------------------------------------------------
# Gold notebook parameters (Fabric-safe)
# --------------------------------------------------

def get_widget_or_default(widget_name: str, default_value: str) -> str:
    try:
        return dbutils.widgets.get(widget_name)
    except Exception:
        return default_value


gold_run_id = get_widget_or_default("gold_run_id", "MANUAL_RUN")
start_date = get_widget_or_default("start_date", "2015-01-01")
end_date   = get_widget_or_default("end_date", "2035-12-31")

print(f"gold_run_id = {gold_run_id}")
print(f"start_date = {start_date}")
print(f"end_date   = {end_date}")


StatementMeta(, cf49562e-52e1-4d7b-b6e9-bc051b1aef8d, 9, Finished, Available, Finished)

gold_run_id = MANUAL_RUN
start_date = 2015-01-01
end_date   = 2035-12-31


In [8]:
from pyspark.sql import functions as F

df_dates = (
    spark
    .sql(f"""
        SELECT explode(
            sequence(
                to_date('{start_date}'),
                to_date('{end_date}'),
                interval 1 day
            )
        ) AS date_value
    """)
)


StatementMeta(, cf49562e-52e1-4d7b-b6e9-bc051b1aef8d, 10, Finished, Available, Finished)

In [9]:
from pyspark.sql import functions as F

df_dim_date = (
    df_dates
    .withColumn("date_id", F.date_format("date_value", "yyyyMMdd").cast("int"))
    .withColumn("day_of_month", F.dayofmonth("date_value"))
    .withColumn(
        "day_of_week_iso",
        F.when(F.dayofweek("date_value") == 1, F.lit(7)).otherwise(F.dayofweek("date_value") - 1)
    )
    .withColumn("day_name", F.date_format("date_value", "EEEE"))
    .withColumn("week_of_year", F.weekofyear("date_value"))
    .withColumn("month_number", F.month("date_value"))
    .withColumn("month_name", F.date_format("date_value", "MMMM"))
    .withColumn("quarter_number", F.quarter("date_value"))
    .withColumn("year_number", F.year("date_value"))
    .withColumn("is_weekend", F.col("day_of_week_iso").isin(6, 7))
)



StatementMeta(, cf49562e-52e1-4d7b-b6e9-bc051b1aef8d, 11, Finished, Available, Finished)

In [10]:
df_dim_date_final = (
    df_dim_date
    .withColumn("gold_run_id", F.lit(gold_run_id))
    .withColumn("gold_load_ts", F.current_timestamp())
)


StatementMeta(, cf49562e-52e1-4d7b-b6e9-bc051b1aef8d, 12, Finished, Available, Finished)

In [11]:
expected_count = (
    spark.sql(f"""
        SELECT datediff(
            to_date('{end_date}'),
            to_date('{start_date}')
        ) + 1 AS cnt
    """)
    .collect()[0]["cnt"]
)

actual_count = df_dim_date_final.count()

if actual_count != expected_count:
    raise ValueError(
        f"dim_date count mismatch: expected {expected_count}, got {actual_count}"
    )


StatementMeta(, cf49562e-52e1-4d7b-b6e9-bc051b1aef8d, 13, Finished, Available, Finished)

In [12]:
spark.sql("TRUNCATE TABLE gold_dim_date")

(
    df_dim_date_final
    .write
    .mode("append")
    .format("delta")
    .saveAsTable("gold_dim_date")
)


StatementMeta(, cf49562e-52e1-4d7b-b6e9-bc051b1aef8d, 14, Finished, Available, Finished)

In [13]:
spark.sql("""
    SELECT
        min(date_value) AS min_date,
        max(date_value) AS max_date,
        count(*) AS row_count
    FROM gold_dim_date
""").show()


StatementMeta(, cf49562e-52e1-4d7b-b6e9-bc051b1aef8d, 15, Finished, Available, Finished)

+----------+----------+---------+
|  min_date|  max_date|row_count|
+----------+----------+---------+
|2015-01-01|2035-12-31|     7670|
+----------+----------+---------+

