In [8]:
import os, re, unicodedata
from datetime import datetime
from pyspark.sql import SparkSession, functions as F
from notebookutils import mssparkutils

StatementMeta(, 4a96c6ee-d8b6-49d5-81b3-93048ea88698, 3, Finished, Available, Finished)

In [9]:
# ---------- Spark ----------
spark = SparkSession.builder.getOrCreate()

StatementMeta(, 4a96c6ee-d8b6-49d5-81b3-93048ea88698, 4, Finished, Available, Finished)

In [10]:
# Seleccionar el Lakehouse destino (bronze)
spark.sql("USE lh_bronze")

StatementMeta(, 4a96c6ee-d8b6-49d5-81b3-93048ea88698, 5, Finished, Available, Finished)

DataFrame[]

In [11]:
# 📂 Carpetas bronze (donde ya están los JSON)
bronze_base_logical = "Files/bronze/ESIOS/data/demanda/evolucion"
bronze_base_physical = "/lakehouse/default/Files/bronze/ESIOS/data/demanda/evolucion"

StatementMeta(, 4a96c6ee-d8b6-49d5-81b3-93048ea88698, 6, Finished, Available, Finished)

In [12]:
# 📂 Salida (Tables/)
tables_prefix = "Tables"

StatementMeta(, 4a96c6ee-d8b6-49d5-81b3-93048ea88698, 7, Finished, Available, Finished)

In [13]:
# --------------------------
# Función utilitaria mejorada
# --------------------------
def slugify(name: str) -> str:
    # Normalizar y quitar tildes
    name = unicodedata.normalize("NFKD", name)
    name = name.encode("ascii", "ignore").decode("utf-8")  # quita acentos
    # Sustituir caracteres no válidos por "_"
    return re.sub(r'[^a-z0-9_]', '_', name.lower())

StatementMeta(, 4a96c6ee-d8b6-49d5-81b3-93048ea88698, 8, Finished, Available, Finished)

In [14]:
## --------------------------
# Procesamiento
# --------------------------
total_rows = 0

# Listar granularidades (day, month)
granularities = [g.name for g in mssparkutils.fs.ls(bronze_base_logical) if g.isDir]

for trunc in granularities:
    years_dirs = mssparkutils.fs.ls(f"{bronze_base_logical}/{trunc}")
    for year_dir in years_dirs:
        year = year_dir.name
        json_files = mssparkutils.fs.ls(year_dir.path)
        for f in json_files:
            if not f.path.endswith(".json"):
                continue

            print(f"\n🔎 Procesando {f.path}")

            # Leer JSON
            df_raw = spark.read.option("multiline", True).json(f.path)

            # Extraer indicador, values
            indicator_id = df_raw.select("indicator.id").first()[0]
            indicator_type = df_raw.select("indicator.short_name").first()[0] \
                              if "short_name" in df_raw.select("indicator.*").columns else indicator_id

            values = df_raw.selectExpr("explode(indicator.values) as val") \
                           .select("val.*")

            if values.rdd.isEmpty():
                print("⚠️ JSON vacío (sin values), skipping")
                continue

            # Enriquecer con metadatos
            df = (values
                  .withColumn("indicator_id", F.lit(indicator_id))
                  .withColumn("indicator_type", F.lit(indicator_type))
                  .withColumn("year", F.lit(int(year)))
                  .withColumn("time_trunc", F.lit(trunc))
                 )

            # Nombre tabla → según tipo de indicador y trunc
            indicator_slug = slugify(indicator_type)
            table_name = f"brz_esios_demanda_{indicator_slug}_{trunc}"

            # Guardar en Delta gestionado (en Lakehouse slv_lkh_esios)
            df.write.format("delta").mode("append").saveAsTable(table_name)

            count = df.count()
            total_rows += count
            print(f"✅ Guardado en tabla {table_name} ({count} registros)")

print(f"\nProceso terminado - {datetime.utcnow().isoformat()}Z")
print(f"Total registros procesados: {total_rows}")

StatementMeta(, 4a96c6ee-d8b6-49d5-81b3-93048ea88698, 9, Finished, Available, Finished)


🔎 Procesando abfss://ecf938c4-c449-48de-a07c-1d968a72b3d1@onelake.dfs.fabric.microsoft.com/0fd09a67-0164-4fb6-838e-02a27c823afc/Files/bronze/ESIOS/data/demanda/evolucion/month/2023/brz-demanda-month-2023.json


✅ Guardado en tabla brz_esios_demanda_demanda_b_c__month (228 registros)

🔎 Procesando abfss://ecf938c4-c449-48de-a07c-1d968a72b3d1@onelake.dfs.fabric.microsoft.com/0fd09a67-0164-4fb6-838e-02a27c823afc/Files/bronze/ESIOS/data/demanda/evolucion/month/2024/brz-demanda-month-2024.json


✅ Guardado en tabla brz_esios_demanda_demanda_b_c__month (228 registros)

🔎 Procesando abfss://ecf938c4-c449-48de-a07c-1d968a72b3d1@onelake.dfs.fabric.microsoft.com/0fd09a67-0164-4fb6-838e-02a27c823afc/Files/bronze/ESIOS/data/demanda/evolucion/month/2025/brz-demanda-month-2025.json


✅ Guardado en tabla brz_esios_demanda_demanda_b_c__month (114 registros)

Proceso terminado - 2025-10-06T12:28:55.800396Z
Total registros procesados: 570
