In [0]:
from pyspark.sql import functions as F

CATALOG = "workspace"
SCHEMA  = "default"

# 1) Mapa de tabelas RAW (origem)
RAW = {
  "customers":   f"{CATALOG}.{SCHEMA}.raw_olist_customers",
  "orders":      f"{CATALOG}.{SCHEMA}.raw_olist_orders",
  "order_items": f"{CATALOG}.{SCHEMA}.raw_olist_order_items",
  "products":    f"{CATALOG}.{SCHEMA}.raw_olist_products",
  "sellers":     f"{CATALOG}.{SCHEMA}.raw_olist_sellers",
  "payments":    f"{CATALOG}.{SCHEMA}.raw_olist_order_payments",
  "reviews":     f"{CATALOG}.{SCHEMA}.raw_olist_order_reviews",
  "geolocation": f"{CATALOG}.{SCHEMA}.raw_olist_geolocation",
  "translation": f"{CATALOG}.{SCHEMA}.raw_product_category_name_translation",
}

# 2) Função padrão: RAW -> BRONZE (Delta) + carimbo de ingestão
def raw_to_bronze(raw_table: str, bronze_table: str):
    df = spark.table(raw_table).withColumn("ingested_at", F.current_timestamp())
    (df.write
       .format("delta")
       .mode("overwrite")
       .saveAsTable(bronze_table)
    )

# 3) Executa para todas as tabelas
for name, raw_table in RAW.items():
    bronze_table = f"{CATALOG}.{SCHEMA}.bronze_olist_{name}"
    raw_to_bronze(raw_table, bronze_table)
    print(f"OK: {raw_table} -> {bronze_table}")



OK: workspace.default.raw_olist_customers -> workspace.default.bronze_olist_customers
OK: workspace.default.raw_olist_orders -> workspace.default.bronze_olist_orders
OK: workspace.default.raw_olist_order_items -> workspace.default.bronze_olist_order_items
OK: workspace.default.raw_olist_products -> workspace.default.bronze_olist_products
OK: workspace.default.raw_olist_sellers -> workspace.default.bronze_olist_sellers
OK: workspace.default.raw_olist_order_payments -> workspace.default.bronze_olist_payments
OK: workspace.default.raw_olist_order_reviews -> workspace.default.bronze_olist_reviews
OK: workspace.default.raw_olist_geolocation -> workspace.default.bronze_olist_geolocation
OK: workspace.default.raw_product_category_name_translation -> workspace.default.bronze_olist_translation
