In [0]:
%load_ext autoreload
%autoreload 2

import sys
from config.table_config import PROJECT_PATH

if PROJECT_PATH not in sys.path:
    sys.path.append(PROJECT_PATH)

from config import *
from utils import *
from pyspark.sql.functions import col, year, month, dayofmonth, dayofweek, \
    quarter, weekofyear, date_format, expr
from datetime import datetime, timedelta

configure_spark(spark)

In [0]:
def date_dimension_transform(source_dfs):
    """
    Generate date dimension (2020-2027).
    
    Date dimension doesn't read from silver - generates dates.
    source_dfs is empty but required by signature.
    """
    
    # Generate date range
    start_date = datetime(2020, 1, 1)
    end_date = datetime(2027, 12, 31)
    
    dates = []
    current = start_date
    while current <= end_date:
        dates.append((current,))
        current += timedelta(days=1)
    
    # Create DataFrame
    date_df = spark.createDataFrame(dates, ["date"])
    
    # Add dimension attributes
    dim_date = date_df.select(
        # Key
        col("date").cast("date").alias("date_key"),
        col("date"),
        
        # Date parts
        year(col("date")).alias("year"),
        quarter(col("date")).alias("quarter"),
        month(col("date")).alias("month"),
        dayofmonth(col("date")).alias("day"),
        dayofweek(col("date")).alias("day_of_week"),
        weekofyear(col("date")).alias("week_of_year"),
        
        # Names
        date_format(col("date"), "MMMM").alias("month_name"),
        date_format(col("date"), "EEEE").alias("day_name"),
        
        # Flags
        (dayofweek(col("date")).isin([1, 7])).cast("boolean").alias("is_weekend"),
        (month(col("date")).isin([12, 1, 2])).cast("boolean").alias("is_winter"),
        (month(col("date")).isin([3, 4, 5])).cast("boolean").alias("is_spring"),
        (month(col("date")).isin([6, 7, 8])).cast("boolean").alias("is_summer"),
        (month(col("date")).isin([9, 10, 11])).cast("boolean").alias("is_fall"),
        
        # Fiscal year (July 1 start)
        expr("CASE WHEN month >= 7 THEN year + 1 ELSE year END").alias("fiscal_year")
    )
    
    return dim_date

In [0]:
GoldTransformation(
    spark=spark,
    table_display_name="dim_date",
    table_type="dimension",
    source_tables=[],  # No source tables - generates data
    target_table=f"{CATALOG_NAME}.gold.dim_date"
).run(
    catalog_name=CATALOG_NAME,
    gold_schema="gold",
    gold_container=GOLD_BASE,
    transform_logic=date_dimension_transform,
    clustering_cols=["year", "month"],
    scd_type=1,
    write_mode="overwrite",
    add_metadata=False  # Date dimension doesn't need created_at/updated_at
)