In [0]:
import dlt
from pyspark.sql.functions import col

# Bronze: raw ingestion
@dlt.table
def customers_bronze():
    return (spark.read.format("csv")
                 .option("header", "true")
                 .load("/Volumes/sivaadbuc/default/batch18test/customers_raw"))

# Silver: cleaned
@dlt.table
def customers_silver():
    df = dlt.read("customers_bronze")
    return (df.withColumn("customer_id", col("customer_id").cast("int"))
              .withColumn("update_timestamp", col("update_timestamp").cast("timestamp"))
              .select("customer_id", "name", "city", "update_timestamp"))

# ✅ Declare the target SCD2 table (must exist in pipeline definition)
dlt.create_streaming_table(
    name="customers_scd2",
    comment="SCD Type 2 managed table for customers"
)

# ✅ Apply SCD2 changes
dlt.apply_changes(
    target="customers_scd2",
    source="customers_silver",
    keys=["customer_id"],
    sequence_by=col("update_timestamp"),
    stored_as_scd_type=2
)


In [0]:
# dlt.apply_changes(
#     # Required
#     target="target_table_name",        # Target table (must be created inside pipeline)
#     source="source_table_name",        # Source view/table (defined in pipeline)
#     keys=["col1", "col2"],             # Business keys to match records
#     sequence_by=col("event_time_col"), # Ordering column for latest record (usually timestamp)
    
#     # SCD storage type
#     stored_as_scd_type=2,              # 1 = overwrite (SCD1), 2 = history tracking (SCD2)

#     # Optional: Delete handling
#     apply_as_deletes=expr("operation = 'DELETE'"),  
#     ignore_null_updates=True,          # If True → skip updates when incoming column = NULL

#     # Optional: Column handling
#     track_columns=["col1", "col2"],    # List of columns to track changes on
#     except_columns=["last_updated_by"],# Columns to ignore when detecting changes
#     column_mapping_mode="name",        # Match by column name (default) or ordinal

#     # Optional: Partition / Storage
#     partition_cols=["year", "month"],  # Partition strategy for target
#     stored_as="delta"                  # Format (always Delta in DLT)
# )
