# pySpark : Streaming Tweet (modern Structured Streaming version)

https://medium.com/@ch.nabarun/easy-to-play-with-twitter-data-using-spark-structured-streaming-76fe86f1f81c

In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.functions import col, split

# create Spark session
spark = SparkSession.builder.appName("TwitterDataFrame").getOrCreate()

# read the tweet data from socket
tweet_df = spark \
    .readStream \
    .format("socket") \
    .option("host", "localhost") \
    .option("port", 9009) \
    .load()

# type cast the column value
tweet_df_string = tweet_df.selectExpr("CAST(value AS STRING)")


# split words based on space, filter out hashtag values and group them up
tweets_tab = tweet_df_string.withColumn('word', explode(split(col('value'), ' '))) \
    .groupBy('word') \
    .count() \
    .sort('count', ascending=False). \
    filter(col('word').contains('#'))

# write the above data into memory. consider the entire analysis in all iteration (output mode = complete). and let the trigger runs in every 2 secs.
writeTweet = tweets_tab.writeStream. \
    outputMode("complete"). \
    format("memory"). \
    queryName("tweetquery"). \
    trigger(processingTime='2 seconds'). \
    start()

print("----- streaming is running -------")

----- streaming is running -------


In [5]:
spark.sql("SELECT * FROM tweetquery LIMIT 10").show()

+----+-----+
|word|count|
+----+-----+
+----+-----+



In [None]:
spark.sparkContext.stop()

```
----------- 2020-11-25 21:40:18 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|            #COVID19|            4|
|         #earthquake|            3|
|        #FlynnPardon|            3|
|         #LifeGoesOn|            2|
|   #Thanksgiving2020|            2|
|              #Labor|            2|
|       #WITHAPURPOSE|            2|
|                   #|            2|
|#OhTheWeatherOuts...|            2|
|             #pdx911|            2|
+--------------------+-------------+
```