In [1]:
from pyspark import SparkConf,SparkContext
from pyspark.streaming import StreamingContext
from pyspark.sql import Row,SQLContext
import sys
import requests
# create spark configuration
conf = SparkConf()
# use two kernels to speed up
conf.setMaster('local[2]')
conf.setAppName("TwitterStreamApp")
# create spark context with the above configuration
sc = SparkContext(conf=conf)
sc.setLogLevel("ERROR")
# create the Streaming Context from the above spark context with interval size 2 seconds
ssc = StreamingContext(sc, 30)
# setting a checkpoint to allow RDD recovery
ssc.checkpoint("checkpoint_TwitterApp")

In [2]:
def aggregate_tags_count(new_values, total_sum):
    return sum(new_values) + (total_sum or 0)

def get_sql_context_instance(spark_context):
    if ('sqlContextSingletonInstance' not in globals()):
        globals()['sqlContextSingletonInstance'] = SQLContext(spark_context)
    return globals()['sqlContextSingletonInstance']

def process_rdd(time, rdd):
    print("----------- %s -----------" % str(time))
    try:
#         print("start")
        # Get spark sql singleton context from the current context
        sql_context = get_sql_context_instance(rdd.context)
        print("------------sql_context----------------")
        sql_context.pprint()
        # convert the RDD to Row RDD
        row_rdd = rdd.map(lambda w: Row(hashtag=w[0], hashtag_count=w[1]))
        print("---------------row_rdd------------------")
        row_rdd.show()
        # create a DF from the Row RDD
        hashtags_df = sql_context.createDataFrame(row_rdd)
        print("----------------hashtags_df-------------")
        hashtags_df.show()
        # Register the dataframe as table
        hashtags_df.registerTempTable("hashtags")
        print("------------hashtages----------------")
        print(hashtages.show())
        # get the top 10 hashtags from the table using SQL and print them
        hashtag_counts_df = sql_context.sql("select hashtag, hashtag_count from hashtags order by hashtag_count desc limit 10")
        print("---------------hashtag_count_df--------------")
        hashtag_counts_df.show()
    except:
        print("------------------error------------------")
        e = sys.exc_info()[0]
        print(e)
#         finally:
#             try:
#                 hashtag_counts_df.show()
#             except:
#                 pass

def send_df_to_dashboard(df):
    # extract the hashtags from dataframe and convert them into array
    top_tags = [str(t.hashtag) for t in df.select("hashtag").collect()]
    # extract the counts from dataframe and convert them into array
    tags_count = [p.hashtag_count for p in df.select("hashtag_count").collect()]
    # initialize and send the data through REST API
    url = 'http://localhost:5001/updateData'
    request_data = {'label': str(top_tags), 'data': str(tags_count)}
    response = requests.post(url, data=request_data)

In [3]:
# read data from port 9009
dataStream = ssc.socketTextStream("localhost",10005)

In [4]:
dataStream.pprint()

In [5]:
# split each tweet into words
words = dataStream.flatMap(lambda line: line.split(" "))
words.pprint()
# filter the words to get only hashtags, then map each hashtag to be a pair of (hashtag,1)
hashtags = words.filter(lambda w: '#' in w).map(lambda x: (x, 1))
# adding the count of each hashtag to its last count
tags_totals = hashtags.updateStateByKey(aggregate_tags_count)
# do processing for each RDD generated in each interval
tags_totals.foreachRDD(process_rdd)
# start the streaming computation
ssc.start()
# wait for the streaming to finish
ssc.awaitTermination()

-------------------------------------------
Time: 2018-10-20 03:48:30
-------------------------------------------
What A Beautiful World We Live In https://t.co/1G9B0h92rl https://t.co/MhIkuuKOao
God I hate the Jazz
Bring back @LukeCage @netflix
@JosieKuhn1 if this ain’t my mood idk what is
Hungry af Chinese food don't do shit lol
This is ignorant. And I’m saying something because you’re wrong AND I don’t respect you.
@patbennettaz @lta100163 @GovHowardDean according to independent liberals over 80% of usa benefits from tax cuts n… https://t.co/8LWIUlNO1u
I have twitter and this use to me my vent space and just let it all out. Nobody from my regular life follows my twi… https://t.co/kpBUMDAmua
From Paris with love - #morningmotivation #disciplineequalsfreedom #wakeup #saturday #morning @ Paris, France https://t.co/26Di8Auuqw
@HoodieAllen fuck yeah they do
...

-------------------------------------------
Time: 2018-10-20 03:48:30
-------------------------------------------
What
A
Beauti

-------------------------------------------
Time: 2018-10-20 03:52:30
-------------------------------------------
#BadhaaiHo is wildly entertaining. Either it’s universally hilarious or I’m immersed enough in Hindi cinema to appr… https://t.co/RHFgnqZbWs
Looks like Colorado State’s starting QB was their problem. WTH? C’mon, Broncos, gotta tighten up on defense. #bleedblue
Yup. It's totally acceptable for adult friends to pay real money to be scared in the woods. #HauntedHouse https://t.co/PPIpC94Plb
CHRIS LAKE TONIGHT 🤩🕺🏻
Jazz on pace for 162? Sheesh.
@FrozenFacade I also can’t do 5AAA &gt; sj.A or 5B &gt; sj.B confirms.
@talkhoops @LILBTHEBASEDGOD I spoke too soon. Just checked the Dodger score.
Spent the afternoon with @ClareDunnMusic and a few hundred of her closest friends. We had a blast! Thanks for havin… https://t.co/hmUPNPPdl6
@SuperWeenieHtJr I had #23.
lmaoo i hated that shit
...

-------------------------------------------
Time: 2018-10-20 03:52:30
--------------------------

-------------------------------------------
Time: 2018-10-20 03:56:30
-------------------------------------------
Ain’t shit wrong with showing off yo girl every once in a while.
Ion even know why i left my house 🙄
@work4brajesh Ishq wala love sune the 😉
@MattMcKenzie27 Can we both just travel to any &amp; every festival, not working a day again?! Cuz that would be the ideal outcome haha
@EdKrassen Twump is more than complicit, he is involved in the death of Khashoggi !!
Congratulations Smryna on your victory against Antioch 45-0 and big shout out to @313marr  @Alex_3m the big fella f… https://t.co/cxnO0iitzt
These days I take actions very seriously, including my own. So if I’m pouring myself into you it’s intentional and… https://t.co/4ho3ROhKAs
I still think the louis "the" hats are the greatest things to happen ever.
I just moaned https://t.co/uCQ92qISc9
if anyone wants to know how i'm doing a huge chunk of my hair fell out while i was in the middle of washing it lol :)
...

-------

KeyboardInterrupt: 

-------------------------------------------
Time: 2018-10-20 03:58:30
-------------------------------------------
In a pool of caramel? Fml ... https://t.co/qR56BMqMQE
I’m as faithful as it gets😂☺️
I am so sorry for Webster, his family and all those whose lives Webster touched. #TheraPets
Barely on social media on Friday’s
Friday’s is all about design &amp; fashion🤗but here some ass to remind y’all I’m sti… https://t.co/acE6p8oXei
@justicar @seattletimes Editorial board, I think you mean. The editorial staff is the reporters, photographers and… https://t.co/OgvjvbYjB3
@MishLGee_xoxo @GoensStephanie Don’t even know who that person is 😂😂😂
@gc__93 Lmao dang man why you always getting into something
She loves me guys
Lmaoooo I be having double the road rage than the driver 😂
...

-------------------------------------------
Time: 2018-10-20 03:58:30
-------------------------------------------
In
a
pool
of
caramel?
Fml
...
https://t.co/qR56BMqMQE
I’m
as
...

----------- 2018-10-20 03:58:30 -

-------------------------------------------
Time: 2018-10-20 04:09:00
-------------------------------------------

-------------------------------------------
Time: 2018-10-20 04:09:00
-------------------------------------------

----------- 2018-10-20 04:09:00 -----------
------------sql_context----------------
<class 'AttributeError'>
-------------------------------------------
Time: 2018-10-20 04:09:30
-------------------------------------------

-------------------------------------------
Time: 2018-10-20 04:09:30
-------------------------------------------

----------- 2018-10-20 04:09:30 -----------
------------sql_context----------------
<class 'AttributeError'>
-------------------------------------------
Time: 2018-10-20 04:10:00
-------------------------------------------

-------------------------------------------
Time: 2018-10-20 04:10:00
-------------------------------------------

----------- 2018-10-20 04:10:00 -----------
------------sql_context----------------
<class '

-------------------------------------------
Time: 2018-10-20 04:21:30
-------------------------------------------

-------------------------------------------
Time: 2018-10-20 04:21:30
-------------------------------------------

----------- 2018-10-20 04:21:30 -----------
------------sql_context----------------
<class 'AttributeError'>
-------------------------------------------
Time: 2018-10-20 04:22:00
-------------------------------------------

-------------------------------------------
Time: 2018-10-20 04:22:00
-------------------------------------------

----------- 2018-10-20 04:22:00 -----------
------------sql_context----------------
<class 'AttributeError'>
-------------------------------------------
Time: 2018-10-20 04:22:30
-------------------------------------------

-------------------------------------------
Time: 2018-10-20 04:22:30
-------------------------------------------

----------- 2018-10-20 04:22:30 -----------
------------sql_context----------------
<class '