In [0]:
%sql

create schema if not exists anirvandecodes.silver;

In [0]:
bronze_table = 'anirvandecodes.bronze.hospital_raw'
silver_table = 'anirvandecodes.silver.dim_hospital'
checkpoint_path = "abfss://data@anirvandecodesdata.dfs.core.windows.net/silver/dim_hospital/checkpoint/"

In [0]:
from pyspark.sql.functions import sha2, col, current_timestamp, monotonically_increasing_id

In [0]:
%sql

select * from anirvandecodes.bronze.hospital_raw;

In [0]:
df_hospital_bronze = (
    spark.readStream.table(bronze_table)
)

df_patient_clean = (
    df_hospital_bronze
        .dropDuplicates(["hospital_id"])
        .withColumn("load_timestamp", current_timestamp())
)



In [0]:
from delta.tables import DeltaTable

def merge_dim_hospital(batch_df, batch_id):
    if not spark.catalog.tableExists(silver_table):
        batch_df.write.format("delta").mode("overwrite").saveAsTable(silver_table)
        return

    # Load Delta table by name and upsert
    dim_hospital = DeltaTable.forName(spark, silver_table)

    (dim_hospital.alias("t")
        .merge(
            batch_df.alias("s"),
            "t.hospital_id = s.hospital_id"
        )
        .whenMatchedUpdateAll()
        .whenNotMatchedInsertAll()
        .execute())

In [0]:
(
    df_patient_clean.writeStream
        .foreachBatch(merge_dim_hospital)
        .outputMode("update")
        .trigger(availableNow=True)
        .option("checkpointLocation", checkpoint_path)
        .start()
)


In [0]:
%sql

select * from anirvandecodes.silver.dim_hospital;