In [0]:
def handle_transaction_fact(df, config):
    # Assume df contains transaction data
    # Load existing records
    existing_df = spark.table(config.gold_table_name)

    # Insert new transactions
    combined_df = existing_df.union(df)

    # Write back to the gold table
    combined_df.write.format("delta").mode("overwrite").saveAsTable(config.gold_table_name)

In [0]:
def handle_accumulating_snapshot(df, config):
    # Load existing snapshot records
    existing_df = spark.table(config.gold_table_name)

    # Update existing records and insert new ones as needed
    combined_df = existing_df.alias("existing").join(
        df.alias("new"),
        on="primary_key",  # Replace with your actual primary key column
        how="outer"
    ).select(
        "existing.primary_key",
        "existing.metric1",  # Existing metrics
        "existing.metric2",  # Existing metrics
        "new.metric1",       # New metrics
        "new.metric2"        # New metrics
    )

    # Write back to the gold table
    combined_df.write.format("delta").mode("overwrite").saveAsTable(config.gold_table_name)

In [0]:
def handle_periodic_snapshot(df, config):
    # Load existing snapshot records
    existing_df = spark.table(config.gold_table_name)

    # Add a timestamp for the snapshot
    snapshot_timestamp = spark.sql("SELECT current_timestamp()").collect()[0][0]
    df = df.withColumn("snapshot_ts", lit(snapshot_timestamp))

    # Combine existing and new snapshots
    combined_df = existing_df.union(df)

    # Write back to the gold table
    combined_df.write.format("delta").mode("overwrite").saveAsTable(config.gold_table_name)

In [0]:
def handle_insert_only_fact(df, config):
    # Load existing records (if needed for any validations)
    existing_df = spark.table(config.gold_table_name)

    # Insert new records
    combined_df = existing_df.union(df)

    # Write back to the gold table
    combined_df.write.format("delta").mode("append").saveAsTable(config.gold_table_name)