In [0]:
# Importações das funções na camada bronze
from pyspark.sql.functions import col, current_timestamp

In [0]:
# Limpando widgets existentes (boa prática em notebooks reutilizáveis)
dbutils.widgets.removeAll()

# Criando widgets
dbutils.widgets.text("folder", "")
dbutils.widgets.text("table_name", "")
dbutils.widgets.text("format", "")
dbutils.widgets.text("catalog", "")
dbutils.widgets.text("schema", "")

# Lendo valores dos widgets
folder = dbutils.widgets.get("folder")
table_name = dbutils.widgets.get("table_name")
file_format = dbutils.widgets.get("format")
catalog = dbutils.widgets.get("catalog")
schema = dbutils.widgets.get("schema")

In [0]:
# Leitura do DataFrame da camada bronze
df = (
    spark.read
        .format(file_format)
        .option("multiline", True)
        #.option("header", True)
        .load(folder)
)

df = df.filter(col("NC") != "Nível Territorial (Código)")

# Adicionando colunas de controle
df = (
    df
        .withColumn("data_source", col("_metadata.file_path"))
        .withColumn("ingestion_ts", current_timestamp())
)

# Salvando como tabela Delta
(
    df.write
        .format("delta")
        .mode("overwrite")
        .option("mergeSchema", "true")
        .saveAsTable(f"{catalog}.{schema}.{table_name}")
)
print(f"Table {table_name} created at {catalog}.{schema}.{table_name}")