# CARGA PROVEEDORES A BRONZE

In [0]:
import re
from pyspark.sql.functions import col

ruta_volumen_prov = "/Volumes/workspace/bronze/uploads/Proveedores.csv"

# 1. Leer el CSV (usamos la configuración de tildes que ya nos funcionó)
df_raw_prov = (spark.read.format("csv")
    .option("header", "true")
    .option("sep", ";")
    .option("encoding", "ISO-8859-1")
    .load(ruta_volumen_prov))

# 2. Limpieza de nombres de columnas para que Delta Lake sea feliz
for old_name in df_raw_prov.columns:
    new_name = old_name.lower().replace('á','a').replace('é','e').replace('í','i').replace('ó','o').replace('ú','u')
    new_name = re.sub(r'[^a-z0-9]+', '_', new_name).strip('_')
    df_raw_prov = df_raw_prov.withColumnRenamed(old_name, new_name)

# 3. Eliminar columnas fantasma (_c15, etc.)
cols_prov = [c for c in df_raw_prov.columns if not c.startswith("_c")]
df_final_bronze_prov = df_raw_prov.select(*cols_prov)

# 4. Guardar en Bronze
df_final_bronze_prov.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("workspace.bronze.proveedores")

print("✅ Tabla Proveedores en Bronze creada.")