In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F, Window
from pyspark.sql.types import (
    StructType, StructField,
    StringType, IntegerType, DoubleType, TimestampType, DateType, LongType, BooleanType
)
from pyspark.sql.utils import AnalysisException

from delta import configure_spark_with_delta_pip
from delta.tables import DeltaTable

# =========
# MINIO / S3A
# =========
MINIO_ENDPOINT = "http://minio:9000"
MINIO_ACCESS_KEY = "minioadmin"
MINIO_SECRET_KEY = "minioadmin123"
MINIO_BUCKET = "spark"

BASE   = f"s3a://{MINIO_BUCKET}/medallion"
BRONZE = f"{BASE}/bronze"
SILVER = f"{BASE}/silver"
GOLD   = f"{BASE}/gold"
STATE  = f"{BASE}/_state"

JDBC_URL = (
    "jdbc:sqlserver://sqlserver:1433;"
    "databaseName=smartpool;"
    "encrypt=true;"
    "trustServerCertificate=true;"
)
JDBC_USER = "sa"
JDBC_PASS = "Password1234%"
JDBC_DRIVER = "com.microsoft.sqlserver.jdbc.SQLServerDriver"

# =========
# PACKAGES (Ivy/Maven)
# =========

EXTRA_JARS = ",".join([
    "/opt/spark/jars/hadoop-aws-3.4.0.jar",
    "/opt/spark/jars/hadoop-common-3.4.0.jar",
    "/opt/spark/jars/aws-java-sdk-bundle-2.23.19.jar",
    "/opt/spark/jars/mssql-jdbc-12.10.2.jre11.jar",
    "/opt/spark/jars/delta-spark_2.13-4.0.0.jar",
    "/opt/spark/jars/delta-storage-4.0.0.jar",
    "/opt/spark/jars/antlr4-runtime-4.13.1.jar",
    "/opt/spark/jars/spark-sql-kafka-0-10_2.13-4.0.1.jar",
    # "/opt/spark/jars/spark-token-provider-kafka-0-10_2.13-4.0.1.jar",
    "/opt/spark/jars/kafka-clients-3.9.1.jar",
])

SPARK_PACKAGES = ",".join([
    # Kafka (necesitas LOS DOS para evitar KafkaConfigUpdater)
    # "org.apache.spark:spark-sql-kafka-0-10_2.13:4.0.1",
    "org.apache.spark:spark-token-provider-kafka-0-10_2.13:4.0.1",

    # Delta
    # "io.delta:delta-spark_2.13:4.0.0",

    # S3A / AWS SDK (elige un combo coherente; este suele ir bien)
    # "org.apache.hadoop:hadoop-aws:3.4.1",
    # "software.amazon.awssdk:bundle:2.24.6",

    # SQLServer JDBC (opcional; si prefieres local jar, quítalo de aquí)
    # "com.microsoft.sqlserver:mssql-jdbc:12.10.2.jre11",
])

spark = (
    SparkSession.builder
    .appName("smartpool-kafka-sensors")
    .master("spark://spark-master:7077")
    .config("spark.submit.deployMode", "client")  # importante en notebooks

    .config("spark.sql.shuffle.partitions", "6")
    .config("spark.sql.adaptive.enabled", "false")
    .config("spark.sql.session.timeZone", "Europe/Madrid")
    .config("spark.streaming.stopGracefullyOnShutdown", "true")
    .config("spark.databricks.delta.schema.autoMerge.enabled", "true")

    # Delta
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

    # MinIO / S3A
    .config("spark.hadoop.fs.s3a.endpoint", MINIO_ENDPOINT)
    .config("spark.hadoop.fs.s3a.access.key", MINIO_ACCESS_KEY)
    .config("spark.hadoop.fs.s3a.secret.key", MINIO_SECRET_KEY)
    .config("spark.hadoop.fs.s3a.path.style.access", "true")
    .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false")
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")

    # Dependencias por Ivy
    .config("spark.jars.packages", SPARK_PACKAGES)
    .config("spark.jars", EXTRA_JARS)
    
    .getOrCreate()
)

# spark = configure_spark_with_delta_pip(builder).getOrCreate()
spark.sparkContext.setLogLevel("WARN")


:: loading settings :: url = jar:file:/opt/conda/lib/python3.11/site-packages/pyspark/jars/ivy-2.5.3.jar!/org/apache/ivy/core/settings/ivysettings.xml
Ivy Default Cache set to: /home/jovyan/.ivy2.5.2/cache
The jars for the packages stored in: /home/jovyan/.ivy2.5.2/jars
org.apache.spark#spark-token-provider-kafka-0-10_2.13 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-8c7b02a5-ab43-4ff8-9369-92694265820c;1.0
	confs: [default]
	found org.apache.spark#spark-token-provider-kafka-0-10_2.13;4.0.1 in central
	found org.apache.kafka#kafka-clients;3.9.1 in central
	found org.lz4#lz4-java;1.8.0 in central
	found org.xerial.snappy#snappy-java;1.1.10.7 in central
	found org.slf4j#slf4j-api;2.0.16 in central
	found org.apache.hadoop#hadoop-client-runtime;3.4.1 in central
	found org.apache.hadoop#hadoop-client-api;3.4.1 in central
	found com.google.code.findbugs#jsr305;3.0.0 in central
:: resolution report :: resolve 237ms :: artifacts dl 6ms
	:: modules in

In [2]:
# from smartpool_config import *

# spark = create_spark("smartpool-kafka-sensors")

print("Spark OK:", spark.version)
print("BASE:", BASE)
print("BRONZE:", BRONZE)
print("SILVER:", SILVER)
print("GOLD:", GOLD)
print("STATE:", STATE)
print("JDBC:", JDBC_URL)


Spark OK: 4.0.1
BASE: s3a://spark/medallion
BRONZE: s3a://spark/medallion/bronze
SILVER: s3a://spark/medallion/silver
GOLD: s3a://spark/medallion/gold
STATE: s3a://spark/medallion/_state
JDBC: jdbc:sqlserver://sqlserver:1433;databaseName=smartpool;encrypt=true;trustServerCertificate=true;


In [3]:
# Kafka
KAFKA_BOOTSTRAP = "192.168.1.40:9094"
TOPIC = "smartpool-sensors"

# Medallion paths
BRONZE_KAFKA = f"{BRONZE}/smartpool_sensors_kafka"
SILVER_SENS  = f"{SILVER}/smartpool_sensors"
GOLD_AGG     = f"{GOLD}/smartpool_sensors_1m"
GOLD_ENR     = f"{GOLD}/smartpool_sensors_enriched"

# Checkpoints
CHK_BASE   = f"{BASE}/_checkpoints"
BRONZE_CHK = f"{CHK_BASE}/bronze_smartpool_sensors_kafka"
SILVER_CHK = f"{CHK_BASE}/silver_smartpool_sensors"
GOLD_AGG_CHK = f"{CHK_BASE}/gold_smartpool_sensors_1m"
GOLD_ENR_CHK = f"{CHK_BASE}/gold_smartpool_sensors_enriched"

# SILVER
SILVER_POOLS   = f"{SILVER}/pools_dim"
SILVER_EVENTS  = f"{SILVER}/maintenance_events"
SILVER_ELEC    = f"{SILVER}/electricity_prices"

# GOLD
GOLD_ELEC_DAILY = f"{GOLD}/electricity_daily_stats"
GOLD_ELEC_PEAK  = f"{GOLD}/electricity_peak_hours"
GOLD_EVENTS_ENR = f"{GOLD}/maintenance_events_enriched"
GOLD_EVENTS_COST= f"{GOLD}/maintenance_events_cost"



In [4]:
def delta_exists(path: str) -> bool:
    try:
        return DeltaTable.isDeltaTable(spark, path)
    except Exception:
        return False

def ensure_delta(path: str, schema: StructType, partition_cols=None):
    if delta_exists(path):
        return
    w = (spark.createDataFrame([], schema).write.format("delta").mode("overwrite").option("overwriteSchema", "true"))
    if partition_cols:
        w = w.partitionBy(*partition_cols)
    w.save(path)
    print("Creada Delta vacía:", path)

In [5]:
raw = (
    spark.readStream
        .format("kafka")
        .option("kafka.bootstrap.servers", KAFKA_BOOTSTRAP)
        .option("subscribe", TOPIC)
        .option("startingOffsets", "latest") #DEBUG
        .option("failOnDataLoss", "false")
        .option("maxOffsetsPerTrigger", "2000")     # evita picos
        .load()
)

bronze_schema = StructType([
    StructField("topic", StringType(), True),
    StructField("partition", IntegerType(), True),
    StructField("offset", LongType(), True),
    StructField("kafka_ts", TimestampType(), True),
    StructField("kafka_key", StringType(), True),
    StructField("kafka_value", StringType(), True),
    StructField("ingest_ts", TimestampType(), True),
    StructField("ingest_date", DateType(), True),
])

ensure_delta(BRONZE_KAFKA, bronze_schema, partition_cols=["ingest_date"])

bronze = (
    raw.select(
        F.col("topic"),
        F.col("partition"),
        F.col("offset"),
        F.col("timestamp").alias("kafka_ts"),
        F.col("key").cast("string").alias("kafka_key"),
        F.col("value").cast("string").alias("kafka_value"),
        F.current_timestamp().alias("ingest_ts"),
        F.to_date(F.current_timestamp()).alias("ingest_date"),
    )
)

q_bronze = (
    bronze.writeStream
        .format("delta")
        .outputMode("append")
        .option("path", BRONZE_KAFKA)
        .option("checkpointLocation", BRONZE_CHK)
        .partitionBy("ingest_date")
        .trigger(processingTime="10 seconds")
        .queryName("bronze_smartpool_sensors_kafka")
        .start()
)

print("BRONZE streaming ->", BRONZE_KAFKA)

26/01/26 22:45:45 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
                                                                                

Creada Delta vacía: s3a://spark/medallion/bronze/smartpool_sensors_kafka
BRONZE streaming -> s3a://spark/medallion/bronze/smartpool_sensors_kafka


In [6]:
sensor_schema = StructType([
    StructField("pool_id", IntegerType(), True),
    StructField("ts", StringType(), True),  # viene ISO ms en el producer
    StructField("ph", DoubleType(), True),
    StructField("chlorine_mg_l", DoubleType(), True),
    StructField("temp_c", DoubleType(), True),
    StructField("turbidity_ntu", DoubleType(), True),
    StructField("water_level_pct", DoubleType(), True),
    StructField("pump_kwh_est", DoubleType(), True),  # opcional
])

silver_schema = StructType([
    StructField("pool_id", IntegerType(), True),
    StructField("sensor_ts", TimestampType(), True),
    StructField("ph", DoubleType(), True),
    StructField("chlorine_mg_l", DoubleType(), True),
    StructField("temp_c", DoubleType(), True),
    StructField("turbidity_ntu", DoubleType(), True),
    StructField("water_level_pct", DoubleType(), True),
    StructField("pump_kwh_est", DoubleType(), True),
    StructField("ingest_date", DateType(), True),
    StructField("silver_ingest_ts", TimestampType(), True),
])

ensure_delta(SILVER_SENS, silver_schema, partition_cols=["ingest_date"])

bronze_stream = spark.readStream.format("delta").load(BRONZE_KAFKA)

parsed = (
    bronze_stream
        .select("kafka_value", "ingest_date")
        .withColumn("json", F.from_json(F.col("kafka_value"), sensor_schema))
        .select(
            F.col("json.pool_id").alias("pool_id"),
            F.col("json.ts").alias("ts"),
            F.col("json.ph").alias("ph"),
            F.col("json.chlorine_mg_l").alias("chlorine_mg_l"),
            F.col("json.temp_c").alias("temp_c"),
            F.col("json.turbidity_ntu").alias("turbidity_ntu"),
            F.col("json.water_level_pct").alias("water_level_pct"),
            F.col("json.pump_kwh_est").alias("pump_kwh_est"),
            F.col("ingest_date"),
        )
        # parse timestamp ISO: "2026-01-25T19:07:21.820Z"
        # .withColumn("sensor_ts", F.to_timestamp(F.regexp_replace("ts", "Z$", ""), "yyyy-MM-dd'T'HH:mm:ss.SSS"))
        .withColumn(
            "sensor_ts",
            F.coalesce(
                F.to_timestamp(F.col("ts"), "yyyy-MM-dd'T'HH:mm:ss.SSSX"),     # 2026-01-25T22:38:45.553Z
                F.to_timestamp(F.col("ts"), "yyyy-MM-dd'T'HH:mm:ss.SSSSSSX"),  # 6 decimales + Z
                F.to_timestamp(F.col("ts"), "yyyy-MM-dd'T'HH:mm:ssX"),         # sin decimales + Z
                F.to_timestamp(F.regexp_replace("ts", "Z$", ""), "yyyy-MM-dd'T'HH:mm:ss.SSS"),    # fallback viejo
                F.to_timestamp(F.regexp_replace("ts", "Z$", ""), "yyyy-MM-dd'T'HH:mm:ss.SSSSSS")  # fallback 6 decimales
            )
        )
        .drop("ts")
        .withColumn("silver_ingest_ts", F.current_timestamp())
)

# Reglas básicas de calidad (ajusta si quieres)
silver = (parsed
    .filter(F.col("pool_id").isNotNull())
    .filter(F.col("sensor_ts").isNotNull())
    .filter(F.col("ph").between(0.0, 14.0))
    .filter(F.col("chlorine_mg_l").between(0.0, 10.0))
    .filter(F.col("temp_c").between(-5.0, 60.0))
    .filter(F.col("turbidity_ntu").between(0.0, 200.0))
    .filter(F.col("water_level_pct").between(0.0, 100.0))
)

q_silver = (
    silver.writeStream
        .format("delta")
        .outputMode("append")
        .option("path", SILVER_SENS)
        .option("checkpointLocation", SILVER_CHK)
        .partitionBy("ingest_date")
        .trigger(processingTime="10 seconds")
        .queryName("silver_smartpool_sensors")
        .start()
)

print("SILVER streaming ->", SILVER_SENS)


Creada Delta vacía: s3a://spark/medallion/silver/smartpool_sensors


26/01/26 22:46:02 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


SILVER streaming -> s3a://spark/medallion/silver/smartpool_sensors


In [7]:
silver_stream = spark.readStream.format("delta").load(SILVER_SENS)

# umbrales “smartpool” (puedes alinear con tu propuesta)
PH_MIN, PH_MAX = 7.1, 7.8
CL_MIN, CL_MAX = 0.4, 1.5

gold_agg = (
    silver_stream
        # .withWatermark("sensor_ts", "2 minutes")
        .withWatermark("sensor_ts", "10 seconds") #DEBUG
        .groupBy(
            F.window(F.col("sensor_ts"), "1 minute").alias("w"),
            F.col("pool_id")
        )
        .agg(
            F.avg("ph").alias("ph_avg"),
            F.avg("chlorine_mg_l").alias("chlorine_avg"),
            F.avg("temp_c").alias("temp_avg"),
            F.max("turbidity_ntu").alias("turbidity_max"),
            F.avg("water_level_pct").alias("water_level_avg"),
            F.sum(F.coalesce("pump_kwh_est", F.lit(0.0))).alias("pump_kwh_sum"),
            F.count("*").alias("num_readings")
        )
        .select(
            "pool_id",
            F.col("w.start").alias("window_start"),
            F.col("w.end").alias("window_end"),
            "ph_avg","chlorine_avg","temp_avg","turbidity_max","water_level_avg",
            "pump_kwh_sum","num_readings",
            # flags
            ( (F.col("ph_avg") < PH_MIN) | (F.col("ph_avg") > PH_MAX) ).alias("ph_out_of_range"),
            ( (F.col("chlorine_avg") < CL_MIN) | (F.col("chlorine_avg") > CL_MAX) ).alias("chlorine_out_of_range"),
            F.current_timestamp().alias("calc_ts"),
            F.to_date(F.current_timestamp()).alias("calc_date"),
        )
)

# crear si no existe
gold_agg_schema = StructType([
    StructField("pool_id", IntegerType(), True),
    StructField("window_start", TimestampType(), True),
    StructField("window_end", TimestampType(), True),
    StructField("ph_avg", DoubleType(), True),
    StructField("chlorine_avg", DoubleType(), True),
    StructField("temp_avg", DoubleType(), True),
    StructField("turbidity_max", DoubleType(), True),
    StructField("water_level_avg", DoubleType(), True),
    StructField("pump_kwh_sum", DoubleType(), True),
    StructField("num_readings", LongType(), True),
    StructField("ph_out_of_range", BooleanType(), True),
    StructField("chlorine_out_of_range", BooleanType(), True),
    StructField("calc_ts", TimestampType(), True),
    StructField("calc_date", DateType(), True),
])

ensure_delta(GOLD_AGG, gold_agg_schema, partition_cols=["calc_date"])

q_gold_agg = (
    gold_agg.writeStream
        .format("delta")
        .outputMode("append")
        .option("path", GOLD_AGG)
        .option("checkpointLocation", GOLD_AGG_CHK)
        .partitionBy("calc_date")
        .trigger(processingTime="10 seconds")
        .queryName("gold_smartpool_sensors_1m")
        .start()
)

print("GOLD agg streaming ->", GOLD_AGG)


                                                                                

Creada Delta vacía: s3a://spark/medallion/gold/smartpool_sensors_1m


[Stage 3:=>                (4 + 4) / 50][Stage 5:>                 (0 + 0) / 50]

GOLD agg streaming -> s3a://spark/medallion/gold/smartpool_sensors_1m


[Stage 3:=====>           (16 + 4) / 50][Stage 5:>                 (0 + 0) / 50]

In [14]:
# Batch dims (estáticos para el streaming)
pools_dim = (spark.read.format("delta").load(SILVER_POOLS)
    .select("pool_id", "pool_name", "location", "owner_type", "is_heated", "volume_liters")
    .dropDuplicates(["pool_id"])
)

elec = (spark.read.format("delta").load(SILVER_ELEC)
    .select(
        F.col("date").alias("elec_date"),
        F.col("hour").alias("elec_hour"),
        "region",
        "price_eur_kwh",
        "price_eur_mwh"
    )
)

# Enriquecemos lecturas (no la agregación), para tener coste puntual
silver_stream2 = spark.readStream.format("delta").load(SILVER_SENS)

enriched = (silver_stream2
    .withColumn("event_date", F.to_date("sensor_ts"))
    .withColumn("event_hour", F.hour("sensor_ts"))
    .join(pools_dim, on="pool_id", how="left")
    .join(
        elec,
        (F.col("event_date") == F.col("elec_date")) & (F.col("event_hour") == F.col("elec_hour")),
        how="left"
    )
    .withColumn("pump_kwh_est", F.coalesce(F.col("pump_kwh_est"), F.lit(0.0)))
    .withColumn("pump_cost_eur_est", F.col("pump_kwh_est") * F.col("price_eur_kwh"))
    .withColumn("calc_ts", F.current_timestamp())
    .withColumn("calc_date", F.to_date(F.col("calc_ts")))
    .drop("elec_date", "elec_hour")
)

# Crea tabla GOLD_ENR si no existe
enr_schema = enriched.schema  # ok para crear vacío
if not delta_exists(GOLD_ENR):
    (spark.createDataFrame([], enr_schema).write.format("delta")
        .mode("overwrite")
        .option("overwriteSchema", "true")
        .partitionBy("calc_date")
        .save(GOLD_ENR)
    )
    print("Creada GOLD enriched:", GOLD_ENR)

q_gold_enr = (
    enriched.writeStream
        .format("delta")
        .outputMode("append")
        .option("path", GOLD_ENR)
        .option("checkpointLocation", GOLD_ENR_CHK)
        .partitionBy("calc_date")
        .trigger(processingTime="10 seconds")
        .queryName("gold_smartpool_sensors_enriched")
        .start()
)

print("GOLD enriched streaming ->", GOLD_ENR)


IllegalArgumentException: Cannot start query with name gold_smartpool_sensors_enriched as a query with that name is already active in this SparkSession

In [24]:
import time
from pyspark.sql import functions as F

print("Streams activos:", len(spark.streams.active))
for q in spark.streams.active:
    print("-", q.name, q.id, "| isActive:", q.isActive)

def print_progress(q):
    lp = q.lastProgress
    if not lp:
        print(f"[{q.name}] sin lastProgress todavía")
        return
    evt = lp.get("eventTime", {})
    print(
        f"[{q.name}] batchId={lp.get('batchId')} "
        f"inputRows={lp.get('numInputRows')} "
        f"procRPS={lp.get('processedRowsPerSecond'):.2f} "
        f"watermark={evt.get('watermark')}"
    )

print("\n== Progreso (lastProgress) ==")
for q in spark.streams.active:
    print_progress(q)

print("\n== Preview GOLD_1m (batch read) ==")
try:
    (spark.read.format("delta")
        .load(GOLD_AGG)
        .orderBy(F.col("window_start").desc())
        .show(50, truncate=False))
except Exception as e:
    print("No se pudo leer GOLD_1m:", e)

print("\n== Preview SILVER (batch read) ==")
try:
    (spark.read.format("delta")
        .load(SILVER_SENS)
        .orderBy(F.col("sensor_ts").desc())
        .show(20, truncate=False))
except Exception as e:
    print("No se pudo leer SILVER:", e)


Streams activos: 4
- gold_smartpool_sensors_1m 96e72db3-decb-4372-90d2-3d670f40b46f | isActive: True
- gold_smartpool_sensors_enriched c9a5e626-6623-4ce9-8cff-90ee90e6a1d5 | isActive: True
- silver_smartpool_sensors 39af24f3-8edf-48d5-b46a-1575ed45d100 | isActive: True
- bronze_smartpool_sensors_kafka 4f82ae83-5f51-4a54-9fc8-b8dabdcf4bd5 | isActive: True

== Progreso (lastProgress) ==
[gold_smartpool_sensors_1m] batchId=52 inputRows=0 procRPS=0.00 watermark=2026-01-26T22:54:09.810Z
[gold_smartpool_sensors_enriched] batchId=49 inputRows=20 procRPS=1.93 watermark=None
[silver_smartpool_sensors] batchId=51 inputRows=10 procRPS=1.49 watermark=None
[bronze_smartpool_sensors_kafka] batchId=52 inputRows=10 procRPS=2.57 watermark=None

== Preview GOLD_1m (batch read) ==
+-------+-------------------+-------------------+------------------+------------------+------------------+-------------+-----------------+------------------+------------+---------------+---------------------+-------------------

26/01/26 22:55:06 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 10000} milliseconds, but spent 16638 milliseconds
[Stage 2170:>               (0 + 4) / 6][Stage 2174:>              (0 + 0) / 50]

No se pudo leer SILVER: [Errno 111] Connection refused


----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 43426)
Traceback (most recent call last):
  File "/opt/conda/lib/python3.11/socketserver.py", line 317, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/opt/conda/lib/python3.11/socketserver.py", line 348, in process_request
    self.finish_request(request, client_address)
  File "/opt/conda/lib/python3.11/socketserver.py", line 361, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/opt/conda/lib/python3.11/socketserver.py", line 755, in __init__
    self.handle()
  File "/opt/conda/lib/python3.11/site-packages/pyspark/accumulators.py", line 299, in handle
    poll(accum_updates)
  File "/opt/conda/lib/python3.11/site-packages/pyspark/accumulators.py", line 271, in poll
    if self.rfile in r and func():
                           ^^^^^^
  File "/opt/conda/lib/python3.11/site-packages/pyspark/accumulators.p

In [10]:
# print("Streams activos:", len(spark.streams.active))
# for q in spark.streams.active:
#     print("-", q.name, q.id)

# # Preview por consola (opcional). Si te consume, comenta esta parte.
# console_q = (
#     gold_agg.writeStream
#         .format("console")
#         .outputMode("append")
#         .option("truncate", "false")
#         .option("checkpointLocation", f"{CHK_BASE}/console_smartpool_sensors_preview")
#         .trigger(processingTime="10 seconds")
#         .queryName("console_smartpool_sensors_preview")
#         .start()
# )

# spark.streams.awaitAnyTermination()
