In [10]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, from_json
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, BooleanType, TimestampType

# Initialize Spark Session
spark = SparkSession.builder \
    .appName("KafkaBTCConsumer") \
    .master("local[*]") \
    .config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.4.1") \
    .getOrCreate()

In [11]:

# Define Kafka topic and broker
kafka_topic = "btc_price"
kafka_bootstrap_servers = "localhost:9092"  # Change if using a remote Kafka server

In [12]:


# Define the schema of incoming data
schema = StructType([
    StructField("date", StringType(), True),  # Change to TimestampType() if timestamp format is consistent
    StructField("favorites", DoubleType(), True),
    StructField("id", StringType(), True),  # Use StringType() to avoid precision issues with large numbers
    StructField("isRetweet", BooleanType(), True),
    StructField("retweets", DoubleType(), True),
    StructField("text", StringType(), True)
])


In [13]:

# Read data from Kafka
kafka_df = spark.readStream \
    .format("kafka") \
    .option("kafka.bootstrap.servers", kafka_bootstrap_servers) \
    .option("subscribe", kafka_topic) \
    .option("startingOffsets", "latest") \
    .load()

In [14]:

# Deserialize Kafka message value (assuming JSON format)
parsed_df = kafka_df.selectExpr("CAST(value AS STRING)") \
    .select(from_json(col("value"), schema).alias("data")) \
    .select("data.*")

# Convert 'date' to TimestampType for proper handling (optional)
parsed_df = parsed_df.withColumn("date", col("date").cast(TimestampType()))

In [15]:
# Display the streaming data in console
query = parsed_df.writeStream \
    .outputMode("append") \
    .format("console") \
    .option("truncate", "false") \
    .start()

query.awaitTermination()


25/03/12 16:52:01 WARN ResolveWriteToStream: Temporary checkpoint location created which is deleted normally when the query didn't fail: /tmp/temporary-03332b4b-4fd7-4227-a092-78d670dd6434. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
25/03/12 16:52:01 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.
25/03/12 16:52:02 WARN AdminClientConfig: These configurations '[key.deserializer, value.deserializer, enable.auto.commit, max.poll.records, auto.offset.reset]' were supplied but are not used yet.


-------------------------------------------
Batch: 0
-------------------------------------------
+----+---------+---+---------+--------+----+
|date|favorites|id |isRetweet|retweets|text|
+----+---------+---+---------+--------+----+
+----+---------+---+---------+--------+----+



                                                                                

-------------------------------------------
Batch: 1
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                                                                                                                              |
+-------------------+---------+----------------------+---------+--------+----------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 10
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+-------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                       |
+-------------------+---------+----------------------+---------+--------+-------------------------------------------------------------------------------------------+
|2024-03-12 16:16:02|2853.0   |1.12083374073064112E17|false    |788.0   |"https://www.washingtontimes.com/news/2024/mar/4/loser-nikki-haley-mining-for-humiliation/"|
+-------------------+---------+----------------------+---------+--------+-------------------------------------------------------------------------------------------+



                                                                                

-------------------------------------------
Batch: 11
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                        |
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------------------------------------------------+
|2020-11-01 06:53:27|0.0      |1.32277876366349517E18|true     |7117.0  |RT @GOP: “A vote for Biden and Harris is a vote to Ban Fracking, Ban Mining, and Completely Destroy Pennsylvania.” -@realDonaldTrump https:…|
+-------------------+---------+-----------

                                                                                

-------------------------------------------
Batch: 13
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                          |
+-------------------+---------+----------------------+---------+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|2024-07-27 18:28:58|14968.0  |1.12859397412472624E17|false    |3716.0  |"On my way to The Bitcoin Conferen

                                                                                

-------------------------------------------
Batch: 17
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+-----------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                               |
+-------------------+---------+----------------------+---------+--------+-----------------------------------------------------------------------------------+
|2020-10-03 19:46:48|650131.0 |1.31244903415450419E18|false    |93323.0 |OUR GREAT USA WANTS &amp; NEEDS STIMULUS. WORK TOGETHER AND GET IT DONE. Thank you!|
+-------------------+---------+----------------------+---------+--------+-----------------------------------------------------------------------------------+

-------------------------------------------
Batch: 18
-------------------------------------------
+------------

                                                                                

-------------------------------------------
Batch: 30
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+-------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                       |
+-------------------+---------+----------------------+---------+--------+-------------------------------------------------------------------------------------------------------------------------------------------+
|2020-02-04 14:20:43|0.0      |1.22468420908998246E18|true     |10570.0 |RT @awzurcher: Here’s your Iowa democracy in action - a coin toss to determine who gets the last delegate out of this #IowaCaucus precinct…|
+-------------------+---------+---------------

                                                                                

-------------------------------------------
Batch: 31
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                          |
+-------------------+---------+----------------------+---------+--------+------------------------------+
|2020-10-13 17:01:57|213136.0 |1.31603142661832704E18|false    |26466.0 |STIMULUS! Go big or go home!!!|
+-------------------+---------+----------------------+---------+--------+------------------------------+

-------------------------------------------
Batch: 32
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+-----------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                               

                                                                                

-------------------------------------------
Batch: 33
-------------------------------------------
+-------------------+---------+---------------------+---------+--------+----------------------------------------------------------------------------------------------------------+
|date               |favorites|id                   |isRetweet|retweets|text                                                                                                      |
+-------------------+---------+---------------------+---------+--------+----------------------------------------------------------------------------------------------------------+
|2020-10-12 23:14:46|106542.0 |1.3157628626404393E18|false    |21160.0 |Republicans should be strongly focused on completing a wonderful stimulus package for the American People!|
+-------------------+---------+---------------------+---------+--------+----------------------------------------------------------------------------------------------------------+



                                                                                

-------------------------------------------
Batch: 34
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                              |
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------+
|2011-08-22 22:33:46|193.0    |1.05739460074938368E17|false    |299.0   |We don't need another stimulus. The first one was a complete failure. Why repeat the same mistake?|
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------+



                                                                                

-------------------------------------------
Batch: 35
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                                                                                                                      |
+-------------------+---------+----------------------+---------+--------+-------------------------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 37
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                                                                                                                                                                                                 |
+-------------------+---------+-------

                                                                                

-------------------------------------------
Batch: 38
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                                                                                                                    |
+-------------------+---------+----------------------+---------+--------+-----------------------------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 39
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                                                                        |
+-------------------+---------+----------------------+---------+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 40
-------------------------------------------
+-------------------+---------+---------------------+---------+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                   |isRetweet|retweets|text                                                                                                                                                                                                                                                                                    |
+-------------------+---------+---------------------+---------+--------+--------------------------------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 41
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                                                                                                  |
+-------------------+---------+----------------------+---------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 42
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                                                                        |
+-------------------+---------+----------------------+---------+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 45
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                        |
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------------------------------------------------+
|2020-05-25 04:20:40|0.0      |1.26474315587595059E18|true     |3519.0  |RT @trtworld: Japan is considering a fresh stimulus package worth over $929 billion that will consist mostly of financial aid programmes fo…|
+-------------------+---------+-----------

                                                                                

-------------------------------------------
Batch: 46
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                       |
+-------------------+---------+----------------------+---------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|2024-09-21 23:31:31|7113.0   |1.13177676464453232E17|false    |1909.0  |"“Trump Credits Success of His NFTs for Change of Heart on Bitcoin and Crypto” https://decrypt.co/249755/trump-credits-nfts-change-of-heart

                                                                                

-------------------------------------------
Batch: 47
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+---------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                               |
+-------------------+---------+----------------------+---------+--------+---------------------------------------------------+
|2020-10-15 22:41:59|304425.0 |1.31684177546665165E18|false    |62271.0 |Pelosi is holding up STIMULUS, not the Republicans!|
+-------------------+---------+----------------------+---------+--------+---------------------------------------------------+

-------------------------------------------
Batch: 48
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+----------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 50
-------------------------------------------
+-------------------+---------+---------------------+---------+--------+-------------------------------------------------------------------------+
|date               |favorites|id                   |isRetweet|retweets|text                                                                     |
+-------------------+---------+---------------------+---------+--------+-------------------------------------------------------------------------+
|2017-02-17 00:44:54|79427.0  |8.3237523027440026E17|false    |15157.0 |'Trump signs bill undoing Obama coal mining rule' https://t.co/yMfT5r5RGh|
+-------------------+---------+---------------------+---------+--------+-------------------------------------------------------------------------+

-------------------------------------------
Batch: 51
-------------------------------------------
+-------------------+---------+----------------------+---------+----

                                                                                

-------------------------------------------
Batch: 88
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                                                                                                                    |
+-------------------+---------+----------------------+---------+--------+-----------------------------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 89
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                                                                                     |
+-------------------+---------+----------------------+---------+--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 96
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+---------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                               |
+-------------------+---------+----------------------+---------+--------+---------------------------------------------------+
|2020-10-15 22:41:59|304425.0 |1.31684177546665165E18|false    |62271.0 |Pelosi is holding up STIMULUS, not the Republicans!|
+-------------------+---------+----------------------+---------+--------+---------------------------------------------------+

-------------------------------------------
Batch: 97
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+----------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 99
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                       |
+-------------------+---------+----------------------+---------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
|2024-09-21 23:31:31|7113.0   |1.13177676464453232E17|false    |1909.0  |"“Trump Credits Success of His NFTs for Change of Heart on Bitcoin and Crypto” https://decrypt.co/249755/trump-credits-nfts-change-of-heart

                                                                                

-------------------------------------------
Batch: 100
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                                                                                |
+-------------------+---------+----------------------+---------+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

                                                                                

-------------------------------------------
Batch: 102
-------------------------------------------
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|date               |favorites|id                    |isRetweet|retweets|text                                                                                                                                                                                                                                  |
+-------------------+---------+----------------------+---------+--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

ERROR:root:KeyboardInterrupt while sending command.
Traceback (most recent call last):
  File "/home/osbdet/.jupyter_venv/lib/python3.11/site-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/osbdet/.jupyter_venv/lib/python3.11/site-packages/py4j/clientserver.py", line 511, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
                          ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/socket.py", line 706, in readinto
    return self._sock.recv_into(b)
           ^^^^^^^^^^^^^^^^^^^^^^^
KeyboardInterrupt
ERROR:root:Exception while sending command.
Traceback (most recent call last):
  File "/home/osbdet/.jupyter_venv/lib/python3.11/site-packages/py4j/clientserver.py", line 516, in send_command
    raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty

During handling of 

KeyboardInterrupt: 