In [0]:
def handle_scd_type_1(df, gold_table_name):
    # Assume df contains the current data to be updated
    # Read the existing records from the gold table
    existing_df = spark.table(gold_table_name)

    # Perform the update operation (join on the primary key)
    updated_df = existing_df.alias("existing").join(
        df.alias("new"),
        on="primary_key",  # Replace with your actual primary key column
        how="outer"
    ).select(
        "new.primary_key",  # Keep the primary key
        "new.column1",      # New values
        "new.column2",      # New values
        # Add other columns as needed
    )

    # Write back to the gold table
    updated_df.write.format("delta").mode("overwrite").saveAsTable(gold_table_name)

In [0]:
def handle_scd_type_2(df, gold_table_name):
    # Create a temporary view for the incoming data
    df.createOrReplaceTempView("incoming_data")

    # MERGE statement to handle SCD Type 2
    merge_sql = f"""
    MERGE INTO {gold_table_name} AS existing
    USING incoming_data AS incoming
    ON existing.business_key = incoming.business_key
    WHEN MATCHED AND existing.valid_to IS NULL AND incoming.new_column != existing.existing_column THEN
        UPDATE SET 
            existing.valid_to = current_timestamp(),
            existing.is_current = 0  -- Mark old record as inactive
    WHEN NOT MATCHED THEN
        INSERT (business_key, new_column, valid_from, valid_to, is_current)
        VALUES (incoming.business_key, incoming.new_column, current_timestamp(), NULL, 1)  -- Insert new record
    """

    # Execute the MERGE statement
    spark.sql(merge_sql)