## Pipeline: Silver to Gold

This pipeline consumes trusted Silver data and applies business logic, aggregations, and metric calculations to create optimized Gold datasets designed for analytics, dashboards, and executive reporting.

In [7]:
%%configure
{
    "driverCores": 4,
    "driverMemory": "28g",
    "executorCores": 4,
    "executorMemory": "28g",
    "numExecutors": 1 
}

StatementMeta(, , -1, Finished, , Finished)

MagicUsageError: The current running Livy session must be restarted for the config changes to take effect. Specify the argument "-f" to force restarting Livy session. Any variables stored in memory will be cleared.

In [50]:
from pyspark.sql.functions import col, sum, count
from pyspark.sql.window import Window

StatementMeta(sparkpool1, 32, 47, Finished, Available, Finished)

In [None]:
# Path variables
silver_path = "abfss://silver@lablicitacoessa.dfs.core.windows.net/Licitacoes_Gov/"
gold_path = "abfss://gold@lablicitacoessa.dfs.core.windows.net/Licitacoes_Gov/"

StatementMeta(sparkpool1, 32, 48, Finished, Available, Finished)

In [None]:
df_silver = spark.read.format("delta").load(silver_path)

StatementMeta(sparkpool1, 32, 49, Finished, Available, Finished)

In [53]:
df_cols = df_silver.select(
    col('COD_MUNICIPIO_UASG'),
    col('DAT_ABERTURA_PROPOSTA'),
    col('DAT_ENTREGA_EDITAL'),
    col('DAT_ENTREGA_PROPOSTA'),
    col('DAT_PUBLICACAO'),
    col('DAT_ALTERACAO'),
    col('END_ENTREGA_EDITAL'),
    col('DSC_FUNCAO_RESPONSAVEL'),
    col('COD_COMPRA'),
    col('DSC_IDENTIFICADOR'),
    col('DSC_INFORMACOES_GERAIS'),
    col('NUM_MODALIDADE'),
    col('NOM_MODALIDADE'),
    col('NOM_RESPONSAVEL'),
    col('NUM_AVISOS'),
    col('NUM_PROCESSO'),
    col('DSC_OBJETO'),
    col('IDT_PERTENCE_14133'),
    col('DSC_SITUACAO_AVISO'),
    col('DSC_TIPO_PREGAO'),
    col('DSC_TIPO_RECURSO'),
    col('COD_UASG'),
    col('VAL_ESTIMADO_TOTAL')
)


StatementMeta(sparkpool1, 32, 50, Finished, Available, Finished)

In [54]:
df_with_rules = df_cols.filter(col('IDT_PERTENCE_14133') == True)

StatementMeta(sparkpool1, 32, 51, Finished, Available, Finished)

In [55]:
window = Window.partitionBy("COD_UASG", "COD_MUNICIPIO_UASG")

df_refined_agg = df_with_rules \
    .withColumn('QTD_LICITACOES_UASG', count('COD_COMPRA').over(window)) \
    .withColumn('VAL_TOTAL_UASG', sum('VAL_ESTIMADO_TOTAL').over(window))

display(df_refined_agg)

StatementMeta(sparkpool1, 32, 52, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 795b9d0a-2062-4e56-8bfd-c9f59a8ee8fe)

In [56]:
spark.sql("CREATE DATABASE IF NOT EXISTS refined")

StatementMeta(sparkpool1, 32, 53, Finished, Available, Finished)

DataFrame[]

In [57]:
(
    df_refined_agg.write
        .format("delta")
        .mode("overwrite") 
        .option("overwriteSchema", "true") 
        .option("path", gold_path)
        .saveAsTable("refined.REF_LICITACOES")
)

StatementMeta(sparkpool1, 32, 54, Finished, Available, Finished)