In [0]:
#-------IMPORTS-----------------------
from delta.tables import DeltaTable

In [0]:
#---UPSERT FUNCTION------------------

def upsert_function(df, catalog_path, adls_path, merge_condition, partition_columns=None):
   if spark.catalog.tableExists(catalog_path) and DeltaTable.isDeltaTable(spark, adls_path):
       delta_table = DeltaTable.forName(spark, catalog_path)
       delta_table.alias("target") \
           .merge(df.alias("source"), merge_condition) \
           .whenMatchedUpdateAll() \
           .whenNotMatchedInsertAll() \
           .execute()
   else:
       df.write.format("delta").mode("overwrite")\
           .option("path", adls_path)\
           .saveAsTable(catalog_path)

#-----WRITE FUNCTION------------------

def write_function(catalog_path, adls_path, merge_condition):
    def validate_function(batch_df, batch_id):
        if batch_df.isEmpty():
            pass
        else:
            upsert_function(
                batch_df,
                catalog_path,
                adls_path,
                merge_condition
            )
    return validate_function

In [0]:
#---CONSTANTS AND PARAMETERS-----

LANDING_PATH = "abfss://landingcontainer@rohegastorage.dfs.core.windows.net/pipeline01/"
CHECKPOINTS_PATH = "abfss://bronzecontainer@rohegastorage.dfs.core.windows.net/checkpoints/"
BRONZE_PATH = "abfss://bronzecontainer@rohegastorage.dfs.core.windows.net/pipeline01/"

table_parameters = [
    {
        "name": "afiliacion_usuarios",
        "merge_condition": "target.id_usuarioAfiliacion = source.id_usuarioAfiliacion"
    },
    {
        "name": "distribuidores",
        "merge_condition": "target.id_distribuidor = source.id_distribuidor"
    },
    {
        "name": "distribuidores_perfiles",
        "merge_condition": "target.id_perfil = source.id_perfil"
    },
    {
        "name": "prospectos",
        "merge_condition": "target.id_prospecto = source.id_prospecto"
    },
    {
        "name": "referidos_origenes",
        "merge_condition": "target.id_origen = source.id_origen"
    },
    {
        "name": "regiones",
        "merge_condition": "target.id_region = source.id_region"
    },
    {
        "name": "zonas",
        "merge_condition": "target.id_zona = source.id_zona"
    },
    {
        "name": "sucursales",
        "merge_condition": "target.id_sucursal = source.id_sucursal"
    },
    {
        "name": "ventas",
        "merge_condition": "target.id_venta = source.id_venta"
    },
    {
        "name": "referidos_asignaciones",
        "merge_condition": "target.id_asignacion = source.id_asignacion"
    }
]

#---READSTREAM AND UPSERT FOR EACH TABLE---------

for parameter in table_parameters:
    df = spark.readStream.format("cloudFiles")\
        .option("cloudFiles.format", "parquet")\
        .option("cloudFiles.schemaLocation", f"{CHECKPOINTS_PATH}{parameter['name']}_schema")\
        .load(f"{LANDING_PATH}{parameter['name']}")
 
    write_table = write_function(
        f"alex_catalog.bronze.{parameter['name']}",
        f"{BRONZE_PATH}{parameter['name']}", 
        parameter["merge_condition"]
    )
    
    df.writeStream.format("delta")\
        .foreachBatch(write_table)\
        .option("checkpointLocation", f"{CHECKPOINTS_PATH}{parameter['name']}_checkpoint")\
        .trigger(once=True)\
        .start()\
        .awaitTermination()