In [3]:
# ============================================================
# UNIFICAR TABLAS METEO + DEMANDA (DAY & MONTHLY)
#    Unión por geo_limit, year, month
#    Resultado se guarda en capa GOLDEN
# ============================================================

from pyspark.sql import SparkSession
from pyspark.sql.functions import col

spark = SparkSession.builder.getOrCreate()

# ============================================================
# CARGAMOS TABLAS DE LA CAPA SILVER
# ============================================================

# Tablas diarias
df_meteo_day = spark.read.table("lh_silver.slv_open_meteo_day_cleaned")
df_demanda_day = spark.read.table("lh_silver.slv_redata_demanda_evolucion_day_cleaned")

# Tablas mensuales
df_meteo_month = spark.read.table("lh_silver.slv_open_meteo_monthly_cleaned")
df_demanda_month = spark.read.table("lh_silver.slv_redata_demanda_evolucion_month_cleaned")

# ============================================================
# PREPARAMOS DATAFRAMES (evitar columnas duplicadas)
# ============================================================

# Eliminamos columnas innecesarias o duplicadas en demanda
# (geo_name ya existe en meteo)
df_demanda_day = df_demanda_day.drop("geo_name")
df_demanda_month = df_demanda_month.drop("geo_name")

# ============================================================
# UNIMOS POR COLUMNAS CLAVE
# ============================================================

join_keys = ["geo_limit", "year", "month"]

# Unión diaria
df_golden_day = df_meteo_day.join(df_demanda_day, on=join_keys, how="inner")

# Unión mensual
df_golden_month = df_meteo_month.join(df_demanda_month, on=join_keys, how="inner")

# ============================================================
# MOSTRAMOS RESULTADOS (para verificar)
# ============================================================

print("✅ Unión diaria completada correctamente:")
display(df_golden_day.limit(10))

print("✅ Unión mensual completada correctamente:")
display(df_golden_month.limit(10))

# ============================================================
# GUARDAMOS EN CAPA GOLDEN
# ============================================================

df_golden_day.write.mode("overwrite").saveAsTable("lh_golden.gld_demanda_diaria")
df_golden_month.write.mode("overwrite").saveAsTable("lh_golden.gld_demanda_mensual")

print("✅ Tablas guardadas correctamente en lh_golden:")
print("   - lh_golden.gld_demanda_diaria")
print("   - lh_golden.gld_demanda_mensual")


StatementMeta(, 08cb0fe1-a3b1-45b7-a67a-8d98f0364cc5, 5, Finished, Available, Finished)

✅ Unión diaria completada correctamente:


SynapseWidget(Synapse.DataFrame, 45132dd3-5d7a-4f81-86d6-b93a4044f2a0)

✅ Unión mensual completada correctamente:


SynapseWidget(Synapse.DataFrame, 4e8d6d48-1e97-4baf-8081-7a2cfcdb80d4)

✅ Tablas guardadas correctamente en lh_golden:
   - lh_golden.gld_demanda_diaria
   - lh_golden.gld_demanda_mensual
