In [15]:
import findspark
findspark.init()

In [16]:
import os

os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.2" pyspark-shell'

In [17]:
from pyspark.sql.session import SparkSession

spark = SparkSession.builder\
                    .appName("BTC_Price")\
                    .getOrCreate()


In [18]:
btc_df = spark.readStream \
    .format("kafka") \
    .option("kafka.bootstrap.servers", "localhost:9092") \
    .option("subscribe", "btc_price") \
    .option("startingOffsets", "latest")\
    .option("kafka.group.id", "spark-btc-group")\
    .load()

25/02/14 15:29:52 WARN KafkaSourceProvider: Kafka option 'kafka.group.id' has been set on this query, it is
 not recommended to set this option. This option is unsafe to use since multiple concurrent
 queries or sources using the same group id will interfere with each other as they are part
 of the same consumer group. Restarted queries may also suffer interference from the
 previous run having the same group id. The user should have only one query per group id,
 and/or set the option 'kafka.session.timeout.ms' to be very small so that the Kafka
 consumers from the previous query are marked dead by the Kafka group coordinator before the
 restarted query starts running.
    


In [19]:
from pyspark.sql.functions import col, from_json
from pyspark.sql.types import StructType, StringType

btc_schema = StructType() \
    .add("ExchangeRate", StringType()) \
    .add("BidPrice", StringType()) \
    .add("AskPrice", StringType()) \
    .add("LastRefreshed", StringType())

# Step 4: Deserialize Kafka Messages (Assuming JSON Format)
parsed_btc_df = btc_df.selectExpr("CAST(value AS STRING)") \
    .select(from_json(col("value"), btc_schema).alias("data")) \
    .select("data.*")  # Unpack JSON structure

# Step 5: Write Streaming Data to Console (For Debugging)
query = parsed_btc_df.writeStream \
    .outputMode("append") \
    .format("console") \
    .option("truncate", "false") \
    .start()

25/02/14 15:29:53 WARN ResolveWriteToStream: Temporary checkpoint location created which is deleted normally when the query didn't fail: /tmp/temporary-76ae61fd-f78b-4256-bb2b-3228c7337240. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
25/02/14 15:29:53 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.
25/02/14 15:29:53 WARN KafkaSourceProvider: Kafka option 'kafka.group.id' has been set on this query, it is
 not recommended to set this option. This option is unsafe to use since multiple concurrent
 queries or sources using the same group id will interfere with each other as they are part
 of the same consumer group. Restarted queries may also suffer interference from the
 previous run having the same group id. The user should have only one query per group id,
 and

-------------------------------------------
Batch: 0
-------------------------------------------
+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------+
+------------+--------+--------+-------------+



                                                                                

-------------------------------------------
Batch: 1
-------------------------------------------
-------------------------------------------
Batch: 9
-------------------------------------------
+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------+
|NULL        |NULL    |NULL    |NULL         |
+------------+--------+--------+-------------+

+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------+
|NULL        |NULL    |NULL    |NULL         |
+------------+--------+--------+-------------+

-------------------------------------------
Batch: 2
-------------------------------------------
-------------------------------------------
Batch: 10
-------------------------------------------
+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------

                                                                                

-------------------------------------------
Batch: 11
-------------------------------------------
-------------------------------------------
Batch: 3
-------------------------------------------
+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------+
|NULL        |NULL    |NULL    |NULL         |
+------------+--------+--------+-------------+

+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------+
|NULL        |NULL    |NULL    |NULL         |
+------------+--------+--------+-------------+

-------------------------------------------
Batch: 12
-------------------------------------------
-------------------------------------------
Batch: 4
-------------------------------------------
+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+------------

                                                                                

-------------------------------------------
Batch: 14
-------------------------------------------
-------------------------------------------
Batch: 6
-------------------------------------------
+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------+
|NULL        |NULL    |NULL    |NULL         |
+------------+--------+--------+-------------+

+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------+
|NULL        |NULL    |NULL    |NULL         |
+------------+--------+--------+-------------+

-------------------------------------------
Batch: 15
-------------------------------------------
-------------------------------------------
Batch: 7
-------------------------------------------
+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+------------

                                                                                

-------------------------------------------
Batch: 31
-------------------------------------------
-------------------------------------------
Batch: 23
-------------------------------------------
+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------+
|NULL        |NULL    |NULL    |NULL         |
+------------+--------+--------+-------------+

+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------+
|NULL        |NULL    |NULL    |NULL         |
+------------+--------+--------+-------------+

-------------------------------------------
Batch: 24
-------------------------------------------
-------------------------------------------
Batch: 32
-------------------------------------------
+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+----------

                                                                                

-------------------------------------------
Batch: 33
-------------------------------------------
-------------------------------------------
Batch: 41
-------------------------------------------
+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------+
|NULL        |NULL    |NULL    |NULL         |
+------------+--------+--------+-------------+

+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+-------------+
|NULL        |NULL    |NULL    |NULL         |
+------------+--------+--------+-------------+

-------------------------------------------
Batch: 34
-------------------------------------------
-------------------------------------------
Batch: 42
-------------------------------------------
+------------+--------+--------+-------------+
|ExchangeRate|BidPrice|AskPrice|LastRefreshed|
+------------+--------+--------+----------