## Baseline stateless receiver

In [None]:
import os

os.environ["PYSPARK_SUBMIT_ARGS"] = (
    "--packages "
    "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.6,"
    "org.apache.spark:spark-streaming-kafka-0-10_2.12:3.5.6 "
    "pyspark-shell"
)

In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
        .appName("read_test_straeam") \
        .getOrCreate()

In [None]:
from pyspark.sql.functions import from_json, col
from pyspark.sql.types import StructType, StructField, StringType

KAFKA_BOOTSTRAP_SERVERS = "127.0.0.1:9092"
KAFKA_TOPIC = "sensors"


schema = StructType().add("sensor", StringType())\
                     .add("info", StructType()\
                          .add("timestamp", StringType())\
                          .add("obs", StringType())\
                          .add("drift", StringType()))

df = spark \
  .readStream \
  .format("kafka") \
  .option("kafka.bootstrap.servers", KAFKA_BOOTSTRAP_SERVERS) \
  .option("subscribe", KAFKA_TOPIC) \
  .option("startingOffsets", "earliest") \
  .load()\
  .select(from_json(col("value").cast("string"), schema).alias("parsed_value")).select(col("parsed_value.sensor"), col("parsed_value.info.*"))

In [None]:
df\
 .writeStream \
 .format("console") \
 .outputMode('Append')\
 .option("checkpointLocation", "/tmp/spark_kafka_checkpoint") \
 .start() \
 .awaitTermination()

-------------------------------------------
Batch: 831
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------+------------------+-----+
|sensor-2|2025-06-12 10:43:11|3.9673789650009827|false|
+--------+-------------------+------------------+-----+

-------------------------------------------
Batch: 832
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------+------------------+-----+
|sensor-3|2025-06-12 10:43:12|1.0680604713178226|false|
+--------+-------------------+------------------+-----+

-------------------------------------------
Batch: 833
-------------------------------------------
+--------+-------------------+----------------+-----+
|  sensor|          timestamp|             obs|drift|
+--------+-------------------+---

                                                                                

-------------------------------------------
Batch: 843
-------------------------------------------
+--------+-------------------+-----------------+-----+
|  sensor|          timestamp|              obs|drift|
+--------+-------------------+-----------------+-----+
|sensor-2|2025-06-12 10:43:23|3.581181892781389|false|
+--------+-------------------+-----------------+-----+

-------------------------------------------
Batch: 844
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------+------------------+-----+
|sensor-3|2025-06-12 10:43:24|0.7454908918120746|false|
+--------+-------------------+------------------+-----+

-------------------------------------------
Batch: 845
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------+----

                                                                                

-------------------------------------------
Batch: 846
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------+------------------+-----+
|sensor-2|2025-06-12 10:43:26|3.2670254863024986|false|
+--------+-------------------+------------------+-----+



                                                                                

-------------------------------------------
Batch: 847
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------+------------------+-----+
|sensor-3|2025-06-12 10:43:27|1.2506641271969614|false|
+--------+-------------------+------------------+-----+



                                                                                

-------------------------------------------
Batch: 848
-------------------------------------------
+--------+-------------------+-----------------+-----+
|  sensor|          timestamp|              obs|drift|
+--------+-------------------+-----------------+-----+
|sensor-1|2025-06-12 10:43:28|2.373860696690056|false|
+--------+-------------------+-----------------+-----+



                                                                                

-------------------------------------------
Batch: 849
-------------------------------------------
+--------+-------------------+-----------------+-----+
|  sensor|          timestamp|              obs|drift|
+--------+-------------------+-----------------+-----+
|sensor-2|2025-06-12 10:43:29|3.601455504217728|false|
+--------+-------------------+-----------------+-----+

-------------------------------------------
Batch: 850
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------+------------------+-----+
|sensor-3|2025-06-12 10:43:30|1.1480943817498044|false|
+--------+-------------------+------------------+-----+

-------------------------------------------
Batch: 851
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------+----

                                                                                

-------------------------------------------
Batch: 876
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------+------------------+-----+
|sensor-2|2025-06-12 10:43:56|3.4628056378358982|false|
+--------+-------------------+------------------+-----+



                                                                                

-------------------------------------------
Batch: 877
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------+------------------+-----+
|sensor-3|2025-06-12 10:43:57|1.3918668864060586|false|
+--------+-------------------+------------------+-----+

-------------------------------------------
Batch: 878
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------+------------------+-----+
|sensor-1|2025-06-12 10:43:58|1.8535258111084767|false|
+--------+-------------------+------------------+-----+

-------------------------------------------
Batch: 879
-------------------------------------------
+--------+-------------------+------------------+-----+
|  sensor|          timestamp|               obs|drift|
+--------+-------------------