In [1]:
from pyspark import SparkConf,SparkContext
from pyspark.streaming import StreamingContext
from pyspark.sql import Row,SQLContext
import sys
import requests
# create spark configuration
conf = SparkConf()
conf.setMaster('local[2]')
conf.setAppName("TwitterStreamApp")
# create spark context with the above configuration
sc = SparkContext(conf=conf)
sc.setLogLevel("ERROR")
# create the Streaming Context from the above spark context with interval size 2 seconds
ssc = StreamingContext(sc, 60)
# setting a checkpoint to allow RDD recovery
ssc.checkpoint("checkpoint_TwitterApp")


In [None]:
def aggregate_tags_count(new_values, total_sum):
    return sum(new_values) + (total_sum or 0)

def get_sql_context_instance(spark_context):
    if ('sqlContextSingletonInstance' not in globals()):
        globals()['sqlContextSingletonInstance'] = SQLContext(spark_context)
    return globals()['sqlContextSingletonInstance']

def process_rdd(time, rdd):
    print("----------- %s -----------" % str(time))
    try:
        # Get spark sql singleton context from the current context
        sql_context = get_sql_context_instance(rdd.context)
        # convert the RDD to Row RDD
        row_rdd = rdd.map(lambda w: Row(hashtag=w[0], hashtag_count=w[1]))
        # create a DF from the Row RDD
        hashtags_df = sql_context.createDataFrame(row_rdd)
        # Register the dataframe as table
        hashtags_df.registerTempTable("hashtags")
        # get the top 10 hashtags from the table using SQL and print them
        hashtag_counts_df = sql_context.sql("select hashtag, hashtag_count from hashtags order by hashtag_count desc limit 20")
#             finally:
        hashtag_counts_df.show()
    except:
        e = sys.exc_info()[0]
        print(e)
    finally:
        try:
            hashtag_counts_df.show()
        except:
            pass
        

In [None]:
# read data from port 9009
dataStream = ssc.socketTextStream("localhost",9009)
dataStream.pprint()
# split each tweet into words
words = dataStream.flatMap(lambda line: line.split(" "))
# filter the words to get only hashtags, then map each hashtag to be a pair of (hashtag,1)
hashtags = words.filter(lambda w: '#' in w).map(lambda x: (x, 1))
# adding the count of each hashtag to its last count
tags_totals = hashtags.updateStateByKey(aggregate_tags_count)
# do processing for each RDD generated in each interval
tags_totals.foreachRDD(process_rdd)
# start the streaming computation
ssc.start()
# wait for the streaming to finish
ssc.awaitTermination()

-------------------------------------------
Time: 2018-11-01 23:53:00
-------------------------------------------
I just need 5 seconds in @KoldKilla_ and @1Bandooo  closet. They outfits 😍😍😍😍.
We here ‼️‼️‼️
As could be predicted, Trump made NO POLICY ANNOUNCEMENT.
We are so grateful to work with a diverse group of incredible #badass companies! Our clients are in manufacturing,… https://t.co/zKpGHw72Iy
I gotta start bein selfish...GOT TO!!😤 cus everybody else sho is 😒
Say she luv me jus to put my chain on ,

----------- 2018-11-01 23:53:00 -----------
+-------+-------------+
|hashtag|hashtag_count|
+-------+-------------+
|#badass|            1|
+-------+-------------+

+-------+-------------+
|hashtag|hashtag_count|
+-------+-------------+
|#badass|            1|
+-------+-------------+

-------------------------------------------
Time: 2018-11-01 23:54:00
-------------------------------------------
Just posted a photo @ Howland Township, Trumbull County, Ohio https://t.co/kHUaX4hbBN


+--------------+-------------+
|       hashtag|hashtag_count|
+--------------+-------------+
|     #BeAVoter|           25|
|          #job|           12|
|       #Hiring|           11|
|      #traffic|            8|
|       #Resist|            7|
|    #CareerArc|            6|
|#TrueToAtlanta|            5|
|         #love|            5|
|       #Repost|            5|
|#StatueOfUnity|            5|
+--------------+-------------+

-------------------------------------------
Time: 2018-11-01 23:59:00
-------------------------------------------
Give me ass or give me death. Ok actually give me both.
Juancho Hernangomez is a really nice young piece for the Nuggets who doesn’t get talked about enough due to injury last season
Check out my brothers new banger !!! @DarrinBakerSAB x Dearly Departed (Prod. Berkli) by DarrinBakerM #np on… https://t.co/vFpilyVxHh
Y’all Be Dating Some Bum Ass Bitches Fr
I hate tests. With a passion.
@I_am_Adrien It’s called lipliner
presented without comment. htt

+-------------+-------------+
|      hashtag|hashtag_count|
+-------------+-------------+
|    #BeAVoter|           59|
|      #Resist|           43|
|      #Hiring|           19|
|         #job|           17|
|#Supernatural|           16|
|   #CareerArc|           12|
|           #1|           10|
|#RaiderNation|           10|
|   #halloween|           10|
|      #LivePD|           10|
+-------------+-------------+

+-------------+-------------+
|      hashtag|hashtag_count|
+-------------+-------------+
|    #BeAVoter|           59|
|      #Resist|           43|
|      #Hiring|           19|
|         #job|           17|
|#Supernatural|           16|
|   #CareerArc|           12|
|           #1|           10|
|#RaiderNation|           10|
|   #halloween|           10|
|      #LivePD|           10|
+-------------+-------------+

-------------------------------------------
Time: 2018-11-02 00:04:00
-------------------------------------------
This is such an awesome picture!
@VTS_Semina

+-------------+-------------+
|      hashtag|hashtag_count|
+-------------+-------------+
|    #BeAVoter|           85|
|      #Resist|           43|
|#Supernatural|           37|
|         #job|           28|
|      #Hiring|           26|
|      #LivePD|           17|
|   #CareerArc|           16|
|     #traffic|           15|
|           #1|           15|
|#RaiderNation|           14|
+-------------+-------------+

+-------------+-------------+
|      hashtag|hashtag_count|
+-------------+-------------+
|    #BeAVoter|           85|
|      #Resist|           43|
|#Supernatural|           37|
|         #job|           28|
|      #Hiring|           26|
|      #LivePD|           17|
|   #CareerArc|           16|
|     #traffic|           15|
|           #1|           15|
|#RaiderNation|           14|
+-------------+-------------+

-------------------------------------------
Time: 2018-11-02 00:09:00
-------------------------------------------
@kendrick38 talks the importance of #voting 

+-------------+-------------+
|      hashtag|hashtag_count|
+-------------+-------------+
|    #BeAVoter|          112|
|#Supernatural|           66|
|         #job|           44|
|      #Resist|           43|
|      #Hiring|           37|
|      #LivePD|           25|
|   #CareerArc|           24|
|   #Halloween|           18|
|     #traffic|           18|
|         #tbt|           18|
+-------------+-------------+

+-------------+-------------+
|      hashtag|hashtag_count|
+-------------+-------------+
|    #BeAVoter|          112|
|#Supernatural|           66|
|         #job|           44|
|      #Resist|           43|
|      #Hiring|           37|
|      #LivePD|           25|
|   #CareerArc|           24|
|   #Halloween|           18|
|     #traffic|           18|
|         #tbt|           18|
+-------------+-------------+

-------------------------------------------
Time: 2018-11-02 00:14:00
-------------------------------------------
Y’all can keep that weirdo rap .. I have my 

+-------------+-------------+
|      hashtag|hashtag_count|
+-------------+-------------+
|    #BeAVoter|          133|
|         #job|          103|
|   #CareerArc|           91|
|      #Hiring|           87|
|#Supernatural|           80|
|      #Resist|           61|
|     #hiring!|           42|
|      #LivePD|           37|
|         #Job|           31|
|        #Jobs|           31|
+-------------+-------------+

+-------------+-------------+
|      hashtag|hashtag_count|
+-------------+-------------+
|    #BeAVoter|          133|
|         #job|          103|
|   #CareerArc|           91|
|      #Hiring|           87|
|#Supernatural|           80|
|      #Resist|           61|
|     #hiring!|           42|
|      #LivePD|           37|
|         #Job|           31|
|        #Jobs|           31|
+-------------+-------------+

-------------------------------------------
Time: 2018-11-02 00:19:00
-------------------------------------------
‘Oscar’s Exit’ Did Judd Nelson star in that?