In [0]:
# -------IMPORTS-------------------------------------------------------------------------------------------------------------------------
from delta.tables import DeltaTable
from pyspark.sql.functions import col, min as min_

# -------READS----------------------------------------------------------------------------------------------------------------------------------
silver_afiliacion_usuarios_df = spark.read.table("alex_catalog.silver.afiliacion_usuarios") 
silver_distribuidores_df = spark.read.table("alex_catalog.silver.distribuidores") 
silver_distribuidores_perfiles_df = spark.read.table("alex_catalog.silver.distribuidores_perfiles") 
silver_referidos_origenes_df = spark.read.table("alex_catalog.silver.referidos_origenes") 
silver_prospectos_df = spark.read.table("alex_catalog.silver.prospectos")
silver_regiones_df = spark.read.table("alex_catalog.silver.regiones")
silver_zonas_df = spark.read.table("alex_catalog.silver.zonas")
silver_sucursales_df = spark.read.table("alex_catalog.silver.sucursales")
silver_ventas_df = spark.read.table("alex_catalog.silver.ventas")
silver_referidos_asignaciones_df = spark.read.table("alex_catalog.silver.referidos_asignaciones")



In [0]:
# -------JOINSS---------------------------------------------------------------------------------------------------------------------------

gold_df = silver_prospectos_df.join(
    silver_sucursales_df.select("id_sucursal", "descripcionSucursal", "id_zona"),
    on="id_sucursal",
    how="left"
)

gold_df = gold_df.join(
    silver_zonas_df.select("id_zona", "descripcionZona", "id_region"),
    on="id_zona",
    how="left"
)

gold_df = gold_df.join(
    silver_regiones_df.select("id_region", "descripcionRegion"),
    on="id_region",
    how="left"
)

gold_df = gold_df.join(
    silver_referidos_origenes_df.select("id_origen", "descripcionOrigen"),
    on="id_origen",
    how="left"
)

gold_df = gold_df.join(
    silver_distribuidores_perfiles_df.select("id_perfil", "descripcionPerfil"),
    on="id_perfil",
    how="left"
)

gold_df = gold_df.join(
    silver_referidos_asignaciones_df.select("id_referido", "fechaAsignacion", "id_usuarioAfiliacion"),
    on="id_referido",
    how="left"
)

gold_df = gold_df.join(
    silver_afiliacion_usuarios_df.select("id_usuarioAfiliacion", "nombreUsuario"),
    on="id_usuarioAfiliacion",
    how="left"
)

gold_df = gold_df.join(
    silver_distribuidores_df.select("id_distribuidor", "id_prospecto"),
    on="id_prospecto",
    how="left"
)

agg_ventas_df = silver_ventas_df\
    .groupBy("id_distribuidor")\
    .agg(min_("fechaVenta").alias("FechaActivacion"))

gold_df = gold_df.join(
    agg_ventas_df.select("id_distribuidor", "FechaActivacion"),
    on="id_distribuidor",
    how="left"
)

renamed_gold_df = gold_df.select(
    col("id_prospecto").alias("ID"),
    col("NombreCompleto").alias("NOMBRE_PROSPECTO"),
    col("descripcionOrigen").alias("ORIGEN"),
    col("descripcionPerfil").alias("PERFIL"),
    col("descripcionSucursal").alias("SUCURSAL"),
    col("descripcionZona").alias("ZONA"),
    col("descripcionRegion").alias("REGION"),
    col("nombreUsuario").alias("PROMOTOR"),
    col("fechaAsignacion").alias("FECHA_ASIGNACION"),
    col("created_at").alias("FECHA_CAPTURA"),
    col("FechaActivacion").alias("FECHA_ACTIVACION")  
)


In [0]:
# --UPSERT FUNCTION----------------------------------------------------------------------------------------------------------------

def upsert_function(df, catalog_path, adls_path, merge_condition, partition_columns=None):

    if spark.catalog.tableExists(catalog_path) and DeltaTable.isDeltaTable(spark, adls_path):

        delta_table = DeltaTable.forName(spark, catalog_path)
        delta_table.alias("target") \
            .merge(df.alias("source"), merge_condition) \
            .whenMatchedUpdateAll() \
            .whenNotMatchedInsertAll() \
            .execute()
    else:

        df.write.format("delta").mode("overwrite")\
            .option("path", adls_path)\
            .saveAsTable(catalog_path)

In [0]:
# -------WRITE ---------------------------------------------------------------------------------------
upsert_function(
    renamed_gold_df,
    "alex_catalog.gold.Prospectos_Activaciones",
    "abfss://goldcontainer@rohegastorage.dfs.core.windows.net/pipeline01/Prospectos_Activaciones",
    "target.ID = source.ID"
)