# Spark Structured Streaming with Iceberg: foreachBatch to multiple sinks

- foreachBatch allow caching of the procesing dataframe, to write to multiple sinks without reprocessing
- However, writes to multiple tables are serialized when using 'foreachBatch`, which reduces parallelization and increases overall latency.
- Idempotent table write is recommended
 https://docs.databricks.com/aws/en/structured-streaming/delta-lake#idempotent-table-writes-in-foreachbatch

In [0]:
from schemas import schema, instrument_ref_schema, instrument_error_schema, instrument_risk_schema 
from pyspark.sql.functions import explode, col, from_json, schema_of_json

In [0]:

fin_df = (
    spark.readStream
    .format("pulsar")
    .option("service.url", "pulsar://6.tcp.us-cal-1.ngrok.io:13185")
    .option("topics", "financial-messages")
    .option("startingOffsets", "latest")
    .load()
    .select(from_json(col("value").cast("string"), schema).alias("value"))
    .select("value.*")
)



In [0]:
def write_to_iceberg(batch_df, batch_id):
    print(f"Processing batch_id: {batch_id}")
    
    # Unique application ID for idempotent writes
    app_id = "pulsar-iceberg-streaming-app"
    
    exploded_df = (
        batch_df
            .select(
                col("jobidentifier"),
                col("analysisidentifier"),
                explode(col("data")).alias("data_item")
            )
            .filter(col("data_item.type") == "instrument")
    )

    exploded_df.persist()

    ### Reference table
    spark.sql(f"""
        CREATE TABLE IF NOT EXISTS users.anhhoang_chu.pulsar_instrument_reference ({instrument_ref_schema})
        USING ICEBERG
        """)
    
    ref_df = (
        exploded_df.select(
            col("data_item.instrumentreference.*")
        )
    )

    (
        ref_df.write
            .format("iceberg")
            .mode("append")
            .option("txnVersion", batch_id)
            .option("txnAppId", app_id)
            .option("schemaLocation", "/Volumes/users/anhhoang_chu/iceberg/pulsar/ref_schema")
            .saveAsTable("users.anhhoang_chu.pulsar_instrument_reference")
    )
    print(f"Written reference data for batch_id: {batch_id}")

    ### Error table
    spark.sql(f"""
        CREATE TABLE IF NOT EXISTS users.anhhoang_chu.pulsar_instrument_error ({instrument_error_schema})
        USING ICEBERG""")
    
    error_df = (
        exploded_df.withColumn("errors", explode(col("data_item.instrumenterror")))
        .select("errors.*")
    )

    (
        error_df.write
            .format("iceberg")
            .mode("append")
            .option("txnVersion", batch_id)
            .option("txnAppId", app_id)
            .option("schemaLocation", "/Volumes/users/anhhoang_chu/iceberg/pulsar/error_schema")
            .saveAsTable("users.anhhoang_chu.pulsar_instrument_error")
    )
    print(f"Written error data for batch_id: {batch_id}")

    ### Risk metric table
    spark.sql(f"""
        CREATE TABLE IF NOT EXISTS users.anhhoang_chu.pulsar_instrument_risk_metric({instrument_risk_schema})
        USING ICEBERG
        """)

    risk_df = (
        exploded_df.withColumn("riskmetric", explode(col("data_item.instrumentriskmetric")))
        .select("riskmetric.*")
    )

    (
        risk_df.write
            .format("iceberg")
            .mode("append")
            .option("txnVersion", batch_id)
            .option("txnAppId", app_id)
            .option("schemaLocation", "/Volumes/users/anhhoang_chu/iceberg/pulsar/risk_schema")
            .saveAsTable("users.anhhoang_chu.pulsar_instrument_risk_metric")
    )
    print(f"Written risk metric data for batch_id: {batch_id}")

    exploded_df.unpersist()


(
    fin_df.writeStream
    .foreachBatch(write_to_iceberg)
    .outputMode("append")
    .trigger(availableNow=True)
    .option("checkpointLocation", "/Volumes/users/anhhoang_chu/iceberg/pulsar/_checkpoints")
    .start()
)

<pyspark.sql.streaming.query.StreamingQuery at 0x7fecabaf5400>

In [0]:
%sql
select * from users.anhhoang_chu.pulsar_instrument_reference;

analysisidentifier,instrumentidentifier,asofdate,accountidentifier,accountname,instrumentname,description,instrumenttype,instrumentsubtype,consumerproductcategory,originationdate,maturitydate,amortizationtype,amortizationenddate,isinterestonly,cashflowtype,instrumentcurrency,notionalportion,unpaidprincipalbalance,currentcommitmentamount,marketpriceoverride,fixedpaymentamount,currentbookpriceoverride,interestratetype,interestpaymentfrequency,curerate,fixedrate,currentrate,portfolioidentifier,interestratespread,interestrateindexmultiplier,interestrateindex,lifetimeinterestratecap,lifetimeinterestratefloor,periodicinterestratecap,periodicratefloor,interestrateresetfirstdate,interestrateresetfrequency,daycount,optionadjustedspreadoverride,modified,parmarketprice,servicingspread,company,discountcurve,accountside,jobidentifier,cashfloworder,cashflowsource,cashflowmodelname,prepaymentorder,prepaymentsource,prepaymentmodelname,prepaymentshift,prepaymentscalingfactor
9b676327-34e3-4b26-9cce-c04e5b4c313c,Bond_78bdcefc,2025-07-22,TPS/CD/CP_AFS,,,Corporate Bond Investment,Bond,,,2025-06-22,2030-07-21,Constant installment,2030-07-21,,,USD,,1000000,,102.5,,100,Fixed,Semi-Annual,,,0.0325,CORPORATE_BONDS_01,0.0,0.0,10YT,99.0,0.0,99.0,0.0,,,30/360,0.0,,,0.0,Financial Corp,TreasuryYield,Asset,cb6c6724-98d4-40b6-8fb2-ac35a55dc9d6,10000,API model,Standard Cash Flow Model,10000,Statistical model,Standard Prepayment Model,0.0,1.0
585a0335-b676-485b-9945-0144c9c9f5f2,Bond_3016acdb,2025-07-22,TPS/CD/CP_AFS,,,Corporate Bond Investment,Bond,,,2025-06-22,2030-07-21,Constant installment,2030-07-21,,,USD,,1000000,,102.5,,100,Fixed,Semi-Annual,,,0.0325,CORPORATE_BONDS_01,0.0,0.0,10YT,99.0,0.0,99.0,0.0,,,30/360,0.0,,,0.0,Financial Corp,TreasuryYield,Asset,3f61ed8d-e34e-44fa-8dcd-3a333ee06d8a,10000,API model,Standard Cash Flow Model,10000,Statistical model,Standard Prepayment Model,0.0,1.0
ff05644d-3f68-428e-a7c2-f4423187aede,Bond_33442b00,2025-07-22,TPS/CD/CP_AFS,,,Corporate Bond Investment,Bond,,,2025-06-22,2030-07-21,Constant installment,2030-07-21,,,USD,,1000000,,102.5,,100,Fixed,Semi-Annual,,,0.0325,CORPORATE_BONDS_01,0.0,0.0,10YT,99.0,0.0,99.0,0.0,,,30/360,0.0,,,0.0,Financial Corp,TreasuryYield,Asset,422189f4-3a7e-4bc0-b4af-39c7ef5e8206,10000,API model,Standard Cash Flow Model,10000,Statistical model,Standard Prepayment Model,0.0,1.0
a4b809e0-8948-4660-b258-bc77ff22c619,Bond_e0c25114,2025-07-22,TPS/CD/CP_AFS,,,Corporate Bond Investment,Bond,,,2025-06-22,2030-07-21,Constant installment,2030-07-21,,,USD,,1000000,,102.5,,100,Fixed,Semi-Annual,,,0.0325,CORPORATE_BONDS_01,0.0,0.0,10YT,99.0,0.0,99.0,0.0,,,30/360,0.0,,,0.0,Financial Corp,TreasuryYield,Asset,23f52345-01e8-4443-a431-71b102f9d75d,10000,API model,Standard Cash Flow Model,10000,Statistical model,Standard Prepayment Model,0.0,1.0
d9f19bd9-8112-48be-baea-1701f3417599,Bond_34207534,2025-07-22,TPS/CD/CP_AFS,,,Corporate Bond Investment,Bond,,,2025-06-22,2030-07-21,Constant installment,2030-07-21,,,USD,,1000000,,102.5,,100,Fixed,Semi-Annual,,,0.0325,CORPORATE_BONDS_01,0.0,0.0,10YT,99.0,0.0,99.0,0.0,,,30/360,0.0,,,0.0,Financial Corp,TreasuryYield,Asset,ebd51ad8-1606-44b0-914d-9603728047a6,10000,API model,Standard Cash Flow Model,10000,Statistical model,Standard Prepayment Model,0.0,1.0
d4f9be64-ce56-44d1-a43d-ae8644b18ff9,Bond_3d6e2d55,2025-07-22,TPS/CD/CP_AFS,,,Corporate Bond Investment,Bond,,,2025-06-22,2030-07-21,Constant installment,2030-07-21,,,USD,,1000000,,102.5,,100,Fixed,Semi-Annual,,,0.0325,CORPORATE_BONDS_01,0.0,0.0,10YT,99.0,0.0,99.0,0.0,,,30/360,0.0,,,0.0,Financial Corp,TreasuryYield,Asset,63b532a1-038c-48eb-9136-4a373626f5d6,10000,API model,Standard Cash Flow Model,10000,Statistical model,Standard Prepayment Model,0.0,1.0
4f74f268-c8be-4542-a574-5da7c83a489a,Bond_c6b6367b,2025-07-22,TPS/CD/CP_AFS,,,Corporate Bond Investment,Bond,,,2025-06-22,2030-07-21,Constant installment,2030-07-21,,,USD,,1000000,,102.5,,100,Fixed,Semi-Annual,,,0.0325,CORPORATE_BONDS_01,0.0,0.0,10YT,99.0,0.0,99.0,0.0,,,30/360,0.0,,,0.0,Financial Corp,TreasuryYield,Asset,cb91dd58-692a-49c5-a3fd-e654b52ca003,10000,API model,Standard Cash Flow Model,10000,Statistical model,Standard Prepayment Model,0.0,1.0
921f22ff-17c2-473c-a91b-f3f99952a84f,Bond_058c434c,2025-07-22,TPS/CD/CP_AFS,,,Corporate Bond Investment,Bond,,,2025-06-22,2030-07-21,Constant installment,2030-07-21,,,USD,,1000000,,102.5,,100,Fixed,Semi-Annual,,,0.0325,CORPORATE_BONDS_01,0.0,0.0,10YT,99.0,0.0,99.0,0.0,,,30/360,0.0,,,0.0,Financial Corp,TreasuryYield,Asset,8e426581-bff8-4ba1-9504-690d8e93df24,10000,API model,Standard Cash Flow Model,10000,Statistical model,Standard Prepayment Model,0.0,1.0
7b371656-3277-4004-b2b2-a3f284ea9a9e,Bond_aa318ef2,2025-07-22,TPS/CD/CP_AFS,,,Corporate Bond Investment,Bond,,,2025-06-22,2030-07-21,Constant installment,2030-07-21,,,USD,,1000000,,102.5,,100,Fixed,Semi-Annual,,,0.0325,CORPORATE_BONDS_01,0.0,0.0,10YT,99.0,0.0,99.0,0.0,,,30/360,0.0,,,0.0,Financial Corp,TreasuryYield,Asset,9f1c4bb7-80cb-4c81-b184-03235975fdf8,10000,API model,Standard Cash Flow Model,10000,Statistical model,Standard Prepayment Model,0.0,1.0


In [0]:
%sql
select * from users.anhhoang_chu.pulsar_instrument_error;

analysisidentifier,jobidentifier,instrumentidentifier,errorcode,errormessage,modulecode,asofdate,scenarioidentifier,severity,portfolioidentifier
ff05644d-3f68-428e-a7c2-f4423187aede,422189f4-3a7e-4bc0-b4af-39c7ef5e8206,Bond_33442b00,1,Warning: High prepayment rate detected for instrument Bond_33442b00,101,,,Warning,CORPORATE_BONDS_01
9b676327-34e3-4b26-9cce-c04e5b4c313c,cb6c6724-98d4-40b6-8fb2-ac35a55dc9d6,Bond_78bdcefc,1,Warning: High prepayment rate detected for instrument Bond_78bdcefc,101,,,Warning,CORPORATE_BONDS_01
585a0335-b676-485b-9945-0144c9c9f5f2,3f61ed8d-e34e-44fa-8dcd-3a333ee06d8a,Bond_3016acdb,1,Warning: High prepayment rate detected for instrument Bond_3016acdb,101,,,Warning,CORPORATE_BONDS_01
a4b809e0-8948-4660-b258-bc77ff22c619,23f52345-01e8-4443-a431-71b102f9d75d,Bond_e0c25114,1,Warning: High prepayment rate detected for instrument Bond_e0c25114,101,,,Warning,CORPORATE_BONDS_01
d9f19bd9-8112-48be-baea-1701f3417599,ebd51ad8-1606-44b0-914d-9603728047a6,Bond_34207534,1,Warning: High prepayment rate detected for instrument Bond_34207534,101,,,Warning,CORPORATE_BONDS_01
d4f9be64-ce56-44d1-a43d-ae8644b18ff9,63b532a1-038c-48eb-9136-4a373626f5d6,Bond_3d6e2d55,1,Warning: High prepayment rate detected for instrument Bond_3d6e2d55,101,,,Warning,CORPORATE_BONDS_01
4f74f268-c8be-4542-a574-5da7c83a489a,cb91dd58-692a-49c5-a3fd-e654b52ca003,Bond_c6b6367b,1,Warning: High prepayment rate detected for instrument Bond_c6b6367b,101,,,Warning,CORPORATE_BONDS_01
7b371656-3277-4004-b2b2-a3f284ea9a9e,9f1c4bb7-80cb-4c81-b184-03235975fdf8,Bond_aa318ef2,1,Warning: High prepayment rate detected for instrument Bond_aa318ef2,101,,,Warning,CORPORATE_BONDS_01
921f22ff-17c2-473c-a91b-f3f99952a84f,8e426581-bff8-4ba1-9504-690d8e93df24,Bond_058c434c,1,Warning: High prepayment rate detected for instrument Bond_058c434c,101,,,Warning,CORPORATE_BONDS_01


In [0]:
%sql
select * from users.anhhoang_chu.pulsar_instrument_risk_metric;

analysisidentifier,reportingdate,inputscenarioidentifier,instrumentidentifier,scenarioidentifier,modelname,modeloutput,asofdate,term,timesegment,annualizedcumulativepd,forwardpd,cumulativepd,marginalpd,maturityriskpd,maturityriskel,lgd,maturityrisklgd,lossrateannualized,lossratecumulative,ead,ccf,ugd,prepaymentrate,forwardprepaymentrate,cumulativeprepaymentrate,recovery,netchargeoff,annualizedpdoneyearprojection,stage1conditionalannualizedcumulativepd,stage2conditionalannualizedcumulativepd,stage3conditionalannualizedcumulativepd,impliedstagerating,netchargeoffamount,collateralvalue,expectedcreditlossamount,expectedcreditlossamountlifetimeprojection,expectedcreditlossamountoneyearprojection,exposure,grossinterestincome,totalinterestexpense,riskweightedassets,stage1portion,stage2portion,stage3portion,transitionprobabilityfromstage1tostage2,transitionprobabilityfromstage1tostage3,transitionprobabilityfromstage2tostage1,transitionprobabilityfromstage2tostage3,transitionprobabilityfromstage3tostage2,balancegrowthrate,lgdvariance,transactionsequence,creditotherthantemporaryimpairment,noncreditotherthantemporaryimpairment,temporaryimpairment,othercomprehensiveincome,otherthantemporaryimpairmentprobability,jobidentifier,valuedate,decayrate,rateresponserate,usagerate,liquidityhaircut,singlemonthlymortalityrate,edfimpliedrating,optionarmminimumpaymentportion,optionarminterestonlyportion,optionarmprincipalandinterestportion,forbearanceportion,forwarddecayrate
,,,Bond_3016acdb,Base,Standard Risk Model,Risk Assessment,2025-07-22,1.0,,0.012,0.011,0.012,0.001,,,0.45,,0.0054,0.0054,1000000.0,,,,0.15,,0.55,,0.013,0.01,0.05,0.95,Investment Grade,,,5400.0,27000.0,5400.0,1000000.0,32500.0,,750000.0,0.85,0.12,0.03,0.05,0.002,0.15,0.08,0.1,0.02,,,,,,,,,,,,,,,BBB,,,,,
,,,Bond_3016acdb,Base,Standard Risk Model,Risk Assessment,2026-07-22,1.0,,0.012,0.011,0.012,0.001,,,0.45,,0.0054,0.0054,1000000.0,,,,0.15,,0.55,,0.013,0.01,0.05,0.95,Investment Grade,,,5400.0,27000.0,5400.0,1000000.0,32500.0,,750000.0,0.85,0.12,0.03,0.05,0.002,0.15,0.08,0.1,0.02,,,,,,,,,,,,,,,BBB,,,,,
,,,Bond_78bdcefc,Base,Standard Risk Model,Risk Assessment,2025-07-22,1.0,,0.012,0.011,0.012,0.001,,,0.45,,0.0054,0.0054,1000000.0,,,,0.15,,0.55,,0.013,0.01,0.05,0.95,Investment Grade,,,5400.0,27000.0,5400.0,1000000.0,32500.0,,750000.0,0.85,0.12,0.03,0.05,0.002,0.15,0.08,0.1,0.02,,,,,,,,,,,,,,,BBB,,,,,
,,,Bond_78bdcefc,Base,Standard Risk Model,Risk Assessment,2026-07-22,1.0,,0.012,0.011,0.012,0.001,,,0.45,,0.0054,0.0054,1000000.0,,,,0.15,,0.55,,0.013,0.01,0.05,0.95,Investment Grade,,,5400.0,27000.0,5400.0,1000000.0,32500.0,,750000.0,0.85,0.12,0.03,0.05,0.002,0.15,0.08,0.1,0.02,,,,,,,,,,,,,,,BBB,,,,,
,,,Bond_33442b00,Base,Standard Risk Model,Risk Assessment,2025-07-22,1.0,,0.012,0.011,0.012,0.001,,,0.45,,0.0054,0.0054,1000000.0,,,,0.15,,0.55,,0.013,0.01,0.05,0.95,Investment Grade,,,5400.0,27000.0,5400.0,1000000.0,32500.0,,750000.0,0.85,0.12,0.03,0.05,0.002,0.15,0.08,0.1,0.02,,,,,,,,,,,,,,,BBB,,,,,
,,,Bond_33442b00,Base,Standard Risk Model,Risk Assessment,2026-07-22,1.0,,0.012,0.011,0.012,0.001,,,0.45,,0.0054,0.0054,1000000.0,,,,0.15,,0.55,,0.013,0.01,0.05,0.95,Investment Grade,,,5400.0,27000.0,5400.0,1000000.0,32500.0,,750000.0,0.85,0.12,0.03,0.05,0.002,0.15,0.08,0.1,0.02,,,,,,,,,,,,,,,BBB,,,,,
,,,Bond_3d6e2d55,Base,Standard Risk Model,Risk Assessment,2025-07-22,1.0,,0.012,0.011,0.012,0.001,,,0.45,,0.0054,0.0054,1000000.0,,,,0.15,,0.55,,0.013,0.01,0.05,0.95,Investment Grade,,,5400.0,27000.0,5400.0,1000000.0,32500.0,,750000.0,0.85,0.12,0.03,0.05,0.002,0.15,0.08,0.1,0.02,,,,,,,,,,,,,,,BBB,,,,,
,,,Bond_3d6e2d55,Base,Standard Risk Model,Risk Assessment,2026-07-22,1.0,,0.012,0.011,0.012,0.001,,,0.45,,0.0054,0.0054,1000000.0,,,,0.15,,0.55,,0.013,0.01,0.05,0.95,Investment Grade,,,5400.0,27000.0,5400.0,1000000.0,32500.0,,750000.0,0.85,0.12,0.03,0.05,0.002,0.15,0.08,0.1,0.02,,,,,,,,,,,,,,,BBB,,,,,
,,,Bond_c6b6367b,Base,Standard Risk Model,Risk Assessment,2025-07-22,1.0,,0.012,0.011,0.012,0.001,,,0.45,,0.0054,0.0054,1000000.0,,,,0.15,,0.55,,0.013,0.01,0.05,0.95,Investment Grade,,,5400.0,27000.0,5400.0,1000000.0,32500.0,,750000.0,0.85,0.12,0.03,0.05,0.002,0.15,0.08,0.1,0.02,,,,,,,,,,,,,,,BBB,,,,,
,,,Bond_c6b6367b,Base,Standard Risk Model,Risk Assessment,2026-07-22,1.0,,0.012,0.011,0.012,0.001,,,0.45,,0.0054,0.0054,1000000.0,,,,0.15,,0.55,,0.013,0.01,0.05,0.95,Investment Grade,,,5400.0,27000.0,5400.0,1000000.0,32500.0,,750000.0,0.85,0.12,0.03,0.05,0.002,0.15,0.08,0.1,0.02,,,,,,,,,,,,,,,BBB,,,,,
