In [1]:
from __future__ import print_function

import sys
import os
from IPython.display import display, clear_output

sys.path.insert(0, '/spark/python')
sys.path.insert(0, '/spark/python/lib/py4j-0.10.8.1-src.zip')
os.environ['SPARK_HOME'] = '/spark'

import pyspark
conf = pyspark.SparkConf()
conf.setMaster("spark://spark-master:7077")
conf.set("spark.driver.memory","1g")
conf.set("spark.executor.memory","512m")
conf.set("spark.executor.instances", "3")
conf.set("spark.executor.cores","1")
sc = pyspark.SparkContext(conf=conf)
print(sc)

from pyspark.streaming import StreamingContext
from pyspark.sql import Row,SQLContext,SparkSession

ssc = pyspark.streaming.StreamingContext(sc, 1)
ssc.checkpoint("checkpoint_TwitterApp")
print(ssc)

<SparkContext master=spark://spark-master:7077 appName=pyspark-shell>
<pyspark.streaming.context.StreamingContext object at 0x7fcc146c36a0>


In [2]:
HOST = '17.18.0.2'  # IP address of spark-master
PORT = 9009  

def aggregate_tags_count(new_values, total_sum):
    return sum(new_values) + (total_sum or 0)

def getSparkSessionInstance(sparkConf):
    if ('sparkSessionSingletonInstance' not in globals()):
        globals()['sparkSessionSingletonInstance'] = SparkSession\
            .builder\
            .config(conf=sparkConf)\
            .getOrCreate()
    return globals()['sparkSessionSingletonInstance']

import requests
def send_df_to_dashboard(df):
    # extract the hashtags from dataframe and convert them into array
    top_tags = [str(t.hashtag) for t in df.select("hashtag").collect()]
    # extract the counts from dataframe and convert them into array
    tags_count = [p.hashtag_count for p in df.select("hashtag_count").collect()]
    # initialize and send the data through REST API
    url = 'http://' + HOST + ':5001/updateData'
    request_data = {'label': str(top_tags), 'data': str(tags_count)}
    #print(request_data)
    response = requests.post(url, data=request_data)

In [3]:
def process_rdd(time, rdd):
    #clear_output()
    print("----------- %s -----------" % str(time))
    try:
        sql_context = getSparkSessionInstance(rdd.context.getConf())
        # convert the RDD to Row RDD
        row_rdd = rdd.map(lambda w: pyspark.sql.Row(hashtag=w[0], hashtag_count=w[1]))
        # create a DF from the Row RDD
        hashtags_df = sql_context.createDataFrame(row_rdd, ("hashtag","hashtag_count"))
        # Register the dataframe as table
        hashtags_df.createOrReplaceTempView("hashtags")
        #print("Finish creating table ...")
        # get the top 10 hashtags from the table using SQL and print them
        sql_stmt = "select hashtag, hashtag_count from hashtags order by hashtag_count desc limit 10"
        hashtag_counts_df = sql_context.sql(sql_stmt)
        # call this method to prepare top 10 hashtags DF and send them
        send_df_to_dashboard(hashtag_counts_df)
    except:
        e = sys.exc_info()[1]
        print(e)

In [None]:
lines = ssc.socketTextStream(HOST, PORT)
words = lines.flatMap(lambda line: line.split(" "))

hashtags = words.filter(lambda w: '#' in w).map(lambda w: (w, 1))
tags_totals = hashtags.updateStateByKey(aggregate_tags_count)
tags_totals.foreachRDD(process_rdd)

ssc.start()
ssc.awaitTermination()

----------- 2020-01-16 18:57:02 -----------
RDD is empty
----------- 2020-01-16 18:57:03 -----------
RDD is empty
----------- 2020-01-16 18:57:04 -----------
RDD is empty
----------- 2020-01-16 18:57:05 -----------
RDD is empty
----------- 2020-01-16 18:57:06 -----------
RDD is empty
----------- 2020-01-16 18:57:07 -----------
RDD is empty
----------- 2020-01-16 18:57:08 -----------
RDD is empty
----------- 2020-01-16 18:57:09 -----------
RDD is empty
----------- 2020-01-16 18:57:10 -----------
RDD is empty
----------- 2020-01-16 18:57:11 -----------
RDD is empty
----------- 2020-01-16 18:57:12 -----------
RDD is empty
----------- 2020-01-16 18:57:13 -----------
RDD is empty
----------- 2020-01-16 18:57:14 -----------
RDD is empty
----------- 2020-01-16 18:57:15 -----------
RDD is empty
----------- 2020-01-16 18:57:16 -----------
RDD is empty
----------- 2020-01-16 18:57:17 -----------
RDD is empty
----------- 2020-01-16 18:57:18 -----------
RDD is empty
----------- 2020-01-16 18:57:19

----------- 2020-01-16 18:59:47 -----------
----------- 2020-01-16 18:59:48 -----------
----------- 2020-01-16 18:59:49 -----------
----------- 2020-01-16 18:59:50 -----------
----------- 2020-01-16 18:59:51 -----------
----------- 2020-01-16 18:59:52 -----------
----------- 2020-01-16 18:59:53 -----------
----------- 2020-01-16 18:59:54 -----------
----------- 2020-01-16 18:59:55 -----------
----------- 2020-01-16 18:59:56 -----------
----------- 2020-01-16 18:59:57 -----------
----------- 2020-01-16 18:59:58 -----------
----------- 2020-01-16 18:59:59 -----------
----------- 2020-01-16 19:00:00 -----------
----------- 2020-01-16 19:00:01 -----------
----------- 2020-01-16 19:00:02 -----------
----------- 2020-01-16 19:00:03 -----------
----------- 2020-01-16 19:00:04 -----------
----------- 2020-01-16 19:00:05 -----------
----------- 2020-01-16 19:00:06 -----------
----------- 2020-01-16 19:00:07 -----------
----------- 2020-01-16 19:00:08 -----------
----------- 2020-01-16 19:00:09 

----------- 2020-01-16 19:02:54 -----------
----------- 2020-01-16 19:02:55 -----------
----------- 2020-01-16 19:02:56 -----------
----------- 2020-01-16 19:02:57 -----------
----------- 2020-01-16 19:02:58 -----------
----------- 2020-01-16 19:02:59 -----------
----------- 2020-01-16 19:03:00 -----------
----------- 2020-01-16 19:03:01 -----------
----------- 2020-01-16 19:03:02 -----------
----------- 2020-01-16 19:03:03 -----------
----------- 2020-01-16 19:03:04 -----------
----------- 2020-01-16 19:03:05 -----------
----------- 2020-01-16 19:03:06 -----------
----------- 2020-01-16 19:03:07 -----------
----------- 2020-01-16 19:03:08 -----------
----------- 2020-01-16 19:03:09 -----------
----------- 2020-01-16 19:03:10 -----------
----------- 2020-01-16 19:03:11 -----------
----------- 2020-01-16 19:03:12 -----------
----------- 2020-01-16 19:03:13 -----------
----------- 2020-01-16 19:03:14 -----------
----------- 2020-01-16 19:03:15 -----------
----------- 2020-01-16 19:03:16 

----------- 2020-01-16 19:06:01 -----------
----------- 2020-01-16 19:06:02 -----------
----------- 2020-01-16 19:06:03 -----------
----------- 2020-01-16 19:06:04 -----------
----------- 2020-01-16 19:06:05 -----------
----------- 2020-01-16 19:06:06 -----------
----------- 2020-01-16 19:06:07 -----------
----------- 2020-01-16 19:06:08 -----------
----------- 2020-01-16 19:06:09 -----------
----------- 2020-01-16 19:06:10 -----------
----------- 2020-01-16 19:06:11 -----------
----------- 2020-01-16 19:06:12 -----------
----------- 2020-01-16 19:06:13 -----------
----------- 2020-01-16 19:06:14 -----------
----------- 2020-01-16 19:06:15 -----------
----------- 2020-01-16 19:06:16 -----------
----------- 2020-01-16 19:06:17 -----------
----------- 2020-01-16 19:06:18 -----------
----------- 2020-01-16 19:06:19 -----------
----------- 2020-01-16 19:06:20 -----------
----------- 2020-01-16 19:06:21 -----------
----------- 2020-01-16 19:06:22 -----------
----------- 2020-01-16 19:06:23 

----------- 2020-01-16 19:09:08 -----------
----------- 2020-01-16 19:09:09 -----------
----------- 2020-01-16 19:09:10 -----------
----------- 2020-01-16 19:09:11 -----------
----------- 2020-01-16 19:09:12 -----------
----------- 2020-01-16 19:09:13 -----------
----------- 2020-01-16 19:09:14 -----------
----------- 2020-01-16 19:09:15 -----------
----------- 2020-01-16 19:09:16 -----------
----------- 2020-01-16 19:09:17 -----------
----------- 2020-01-16 19:09:18 -----------
----------- 2020-01-16 19:09:19 -----------
----------- 2020-01-16 19:09:20 -----------
----------- 2020-01-16 19:09:21 -----------
----------- 2020-01-16 19:09:22 -----------
----------- 2020-01-16 19:09:23 -----------
----------- 2020-01-16 19:09:24 -----------
----------- 2020-01-16 19:09:25 -----------
----------- 2020-01-16 19:09:26 -----------
----------- 2020-01-16 19:09:27 -----------
----------- 2020-01-16 19:09:28 -----------
----------- 2020-01-16 19:09:29 -----------
----------- 2020-01-16 19:09:30 

----------- 2020-01-16 19:12:15 -----------
----------- 2020-01-16 19:12:16 -----------
----------- 2020-01-16 19:12:17 -----------
----------- 2020-01-16 19:12:18 -----------
----------- 2020-01-16 19:12:19 -----------
----------- 2020-01-16 19:12:20 -----------
----------- 2020-01-16 19:12:21 -----------
----------- 2020-01-16 19:12:22 -----------
----------- 2020-01-16 19:12:23 -----------
----------- 2020-01-16 19:12:24 -----------
----------- 2020-01-16 19:12:25 -----------
----------- 2020-01-16 19:12:26 -----------
----------- 2020-01-16 19:12:27 -----------
----------- 2020-01-16 19:12:28 -----------
----------- 2020-01-16 19:12:29 -----------
----------- 2020-01-16 19:12:30 -----------
----------- 2020-01-16 19:12:31 -----------
----------- 2020-01-16 19:12:32 -----------
----------- 2020-01-16 19:12:33 -----------
----------- 2020-01-16 19:12:34 -----------
----------- 2020-01-16 19:12:35 -----------
----------- 2020-01-16 19:12:36 -----------
----------- 2020-01-16 19:12:37 

----------- 2020-01-16 19:15:22 -----------
----------- 2020-01-16 19:15:23 -----------
----------- 2020-01-16 19:15:24 -----------
----------- 2020-01-16 19:15:25 -----------
----------- 2020-01-16 19:15:26 -----------
----------- 2020-01-16 19:15:27 -----------
----------- 2020-01-16 19:15:28 -----------
----------- 2020-01-16 19:15:29 -----------
----------- 2020-01-16 19:15:30 -----------
----------- 2020-01-16 19:15:31 -----------
----------- 2020-01-16 19:15:32 -----------
----------- 2020-01-16 19:15:33 -----------
----------- 2020-01-16 19:15:34 -----------
----------- 2020-01-16 19:15:35 -----------
----------- 2020-01-16 19:15:36 -----------
----------- 2020-01-16 19:15:37 -----------
----------- 2020-01-16 19:15:38 -----------
----------- 2020-01-16 19:15:39 -----------
----------- 2020-01-16 19:15:40 -----------
----------- 2020-01-16 19:15:41 -----------
----------- 2020-01-16 19:15:42 -----------
----------- 2020-01-16 19:15:43 -----------
----------- 2020-01-16 19:15:44 

----------- 2020-01-16 19:18:29 -----------
----------- 2020-01-16 19:18:30 -----------
----------- 2020-01-16 19:18:31 -----------
----------- 2020-01-16 19:18:32 -----------
----------- 2020-01-16 19:18:33 -----------
----------- 2020-01-16 19:18:34 -----------
----------- 2020-01-16 19:18:35 -----------
----------- 2020-01-16 19:18:36 -----------
----------- 2020-01-16 19:18:37 -----------
----------- 2020-01-16 19:18:38 -----------
----------- 2020-01-16 19:18:39 -----------
----------- 2020-01-16 19:18:40 -----------
----------- 2020-01-16 19:18:41 -----------
----------- 2020-01-16 19:18:42 -----------
----------- 2020-01-16 19:18:43 -----------
----------- 2020-01-16 19:18:44 -----------
----------- 2020-01-16 19:18:45 -----------
----------- 2020-01-16 19:18:46 -----------
----------- 2020-01-16 19:18:47 -----------
----------- 2020-01-16 19:18:48 -----------
----------- 2020-01-16 19:18:49 -----------
----------- 2020-01-16 19:18:50 -----------
----------- 2020-01-16 19:18:51 

----------- 2020-01-16 19:21:36 -----------
----------- 2020-01-16 19:21:37 -----------
----------- 2020-01-16 19:21:38 -----------
----------- 2020-01-16 19:21:39 -----------
----------- 2020-01-16 19:21:40 -----------
----------- 2020-01-16 19:21:41 -----------
----------- 2020-01-16 19:21:42 -----------
----------- 2020-01-16 19:21:43 -----------
----------- 2020-01-16 19:21:44 -----------
----------- 2020-01-16 19:21:45 -----------
----------- 2020-01-16 19:21:46 -----------
----------- 2020-01-16 19:21:47 -----------
----------- 2020-01-16 19:21:48 -----------
----------- 2020-01-16 19:21:49 -----------
----------- 2020-01-16 19:21:50 -----------
----------- 2020-01-16 19:21:51 -----------
----------- 2020-01-16 19:21:52 -----------
----------- 2020-01-16 19:21:53 -----------
----------- 2020-01-16 19:21:54 -----------
----------- 2020-01-16 19:21:55 -----------
----------- 2020-01-16 19:21:56 -----------
----------- 2020-01-16 19:21:57 -----------
----------- 2020-01-16 19:21:58 

----------- 2020-01-16 19:24:43 -----------
----------- 2020-01-16 19:24:44 -----------
----------- 2020-01-16 19:24:45 -----------
----------- 2020-01-16 19:24:46 -----------
----------- 2020-01-16 19:24:47 -----------
----------- 2020-01-16 19:24:48 -----------
----------- 2020-01-16 19:24:49 -----------
----------- 2020-01-16 19:24:50 -----------
----------- 2020-01-16 19:24:51 -----------
----------- 2020-01-16 19:24:52 -----------
----------- 2020-01-16 19:24:53 -----------
----------- 2020-01-16 19:24:54 -----------
----------- 2020-01-16 19:24:55 -----------
----------- 2020-01-16 19:24:56 -----------
----------- 2020-01-16 19:24:57 -----------
----------- 2020-01-16 19:24:58 -----------
----------- 2020-01-16 19:24:59 -----------
----------- 2020-01-16 19:25:00 -----------
----------- 2020-01-16 19:25:01 -----------
----------- 2020-01-16 19:25:02 -----------
----------- 2020-01-16 19:25:03 -----------
----------- 2020-01-16 19:25:04 -----------
----------- 2020-01-16 19:25:05 

----------- 2020-01-16 19:27:50 -----------
----------- 2020-01-16 19:27:51 -----------
----------- 2020-01-16 19:27:52 -----------
----------- 2020-01-16 19:27:53 -----------
----------- 2020-01-16 19:27:54 -----------
----------- 2020-01-16 19:27:55 -----------
----------- 2020-01-16 19:27:56 -----------
----------- 2020-01-16 19:27:57 -----------
----------- 2020-01-16 19:27:58 -----------
----------- 2020-01-16 19:27:59 -----------
----------- 2020-01-16 19:28:00 -----------
----------- 2020-01-16 19:28:01 -----------
----------- 2020-01-16 19:28:02 -----------
----------- 2020-01-16 19:28:03 -----------
----------- 2020-01-16 19:28:04 -----------
----------- 2020-01-16 19:28:05 -----------
----------- 2020-01-16 19:28:06 -----------
----------- 2020-01-16 19:28:07 -----------
----------- 2020-01-16 19:28:08 -----------
----------- 2020-01-16 19:28:09 -----------
----------- 2020-01-16 19:28:10 -----------
----------- 2020-01-16 19:28:11 -----------
----------- 2020-01-16 19:28:12 

----------- 2020-01-16 19:30:57 -----------
----------- 2020-01-16 19:30:58 -----------
----------- 2020-01-16 19:30:59 -----------
----------- 2020-01-16 19:31:00 -----------
----------- 2020-01-16 19:31:01 -----------
----------- 2020-01-16 19:31:02 -----------
----------- 2020-01-16 19:31:03 -----------
----------- 2020-01-16 19:31:04 -----------
----------- 2020-01-16 19:31:05 -----------
----------- 2020-01-16 19:31:06 -----------
----------- 2020-01-16 19:31:07 -----------
----------- 2020-01-16 19:31:08 -----------
----------- 2020-01-16 19:31:09 -----------
----------- 2020-01-16 19:31:10 -----------
----------- 2020-01-16 19:31:11 -----------
----------- 2020-01-16 19:31:12 -----------
----------- 2020-01-16 19:31:13 -----------
----------- 2020-01-16 19:31:14 -----------
----------- 2020-01-16 19:31:15 -----------
----------- 2020-01-16 19:31:16 -----------
----------- 2020-01-16 19:31:17 -----------
----------- 2020-01-16 19:31:18 -----------
----------- 2020-01-16 19:31:19 

----------- 2020-01-16 19:34:04 -----------
----------- 2020-01-16 19:34:05 -----------
----------- 2020-01-16 19:34:06 -----------
----------- 2020-01-16 19:34:07 -----------
----------- 2020-01-16 19:34:08 -----------
----------- 2020-01-16 19:34:09 -----------
----------- 2020-01-16 19:34:10 -----------
----------- 2020-01-16 19:34:11 -----------
----------- 2020-01-16 19:34:12 -----------
----------- 2020-01-16 19:34:13 -----------
----------- 2020-01-16 19:34:14 -----------
----------- 2020-01-16 19:34:15 -----------
----------- 2020-01-16 19:34:16 -----------
----------- 2020-01-16 19:34:17 -----------
----------- 2020-01-16 19:34:18 -----------
----------- 2020-01-16 19:34:19 -----------
----------- 2020-01-16 19:34:20 -----------
----------- 2020-01-16 19:34:21 -----------
----------- 2020-01-16 19:34:22 -----------
----------- 2020-01-16 19:34:23 -----------
----------- 2020-01-16 19:34:24 -----------
----------- 2020-01-16 19:34:25 -----------
----------- 2020-01-16 19:34:26 

----------- 2020-01-16 19:37:11 -----------
----------- 2020-01-16 19:37:12 -----------
----------- 2020-01-16 19:37:13 -----------
----------- 2020-01-16 19:37:14 -----------
----------- 2020-01-16 19:37:15 -----------
----------- 2020-01-16 19:37:16 -----------
----------- 2020-01-16 19:37:17 -----------
----------- 2020-01-16 19:37:18 -----------
----------- 2020-01-16 19:37:19 -----------
----------- 2020-01-16 19:37:20 -----------
----------- 2020-01-16 19:37:21 -----------
----------- 2020-01-16 19:37:22 -----------
----------- 2020-01-16 19:37:23 -----------
----------- 2020-01-16 19:37:24 -----------
----------- 2020-01-16 19:37:25 -----------
----------- 2020-01-16 19:37:26 -----------
----------- 2020-01-16 19:37:27 -----------
----------- 2020-01-16 19:37:28 -----------
----------- 2020-01-16 19:37:29 -----------
----------- 2020-01-16 19:37:30 -----------
----------- 2020-01-16 19:37:31 -----------
----------- 2020-01-16 19:37:32 -----------
----------- 2020-01-16 19:37:33 

----------- 2020-01-16 19:40:18 -----------
----------- 2020-01-16 19:40:19 -----------
----------- 2020-01-16 19:40:20 -----------
----------- 2020-01-16 19:40:21 -----------
----------- 2020-01-16 19:40:22 -----------
----------- 2020-01-16 19:40:23 -----------
----------- 2020-01-16 19:40:24 -----------
----------- 2020-01-16 19:40:25 -----------
----------- 2020-01-16 19:40:26 -----------
----------- 2020-01-16 19:40:27 -----------
----------- 2020-01-16 19:40:28 -----------
----------- 2020-01-16 19:40:29 -----------
----------- 2020-01-16 19:40:30 -----------
----------- 2020-01-16 19:40:31 -----------
----------- 2020-01-16 19:40:32 -----------
----------- 2020-01-16 19:40:33 -----------
----------- 2020-01-16 19:40:34 -----------
----------- 2020-01-16 19:40:35 -----------
----------- 2020-01-16 19:40:36 -----------
----------- 2020-01-16 19:40:37 -----------
----------- 2020-01-16 19:40:38 -----------
----------- 2020-01-16 19:40:39 -----------
----------- 2020-01-16 19:40:40 

----------- 2020-01-16 19:43:25 -----------
----------- 2020-01-16 19:43:26 -----------
----------- 2020-01-16 19:43:27 -----------
----------- 2020-01-16 19:43:28 -----------
----------- 2020-01-16 19:43:29 -----------
----------- 2020-01-16 19:43:30 -----------
----------- 2020-01-16 19:43:31 -----------
----------- 2020-01-16 19:43:32 -----------
----------- 2020-01-16 19:43:33 -----------
----------- 2020-01-16 19:43:34 -----------
----------- 2020-01-16 19:43:35 -----------
----------- 2020-01-16 19:43:36 -----------
----------- 2020-01-16 19:43:37 -----------
----------- 2020-01-16 19:43:38 -----------
----------- 2020-01-16 19:43:39 -----------
----------- 2020-01-16 19:43:40 -----------
----------- 2020-01-16 19:43:41 -----------
----------- 2020-01-16 19:43:42 -----------
----------- 2020-01-16 19:43:43 -----------
----------- 2020-01-16 19:43:44 -----------
----------- 2020-01-16 19:43:45 -----------
----------- 2020-01-16 19:43:46 -----------
----------- 2020-01-16 19:43:47 

----------- 2020-01-16 19:46:32 -----------
----------- 2020-01-16 19:46:33 -----------
----------- 2020-01-16 19:46:34 -----------
----------- 2020-01-16 19:46:35 -----------
----------- 2020-01-16 19:46:36 -----------
----------- 2020-01-16 19:46:37 -----------
----------- 2020-01-16 19:46:38 -----------
----------- 2020-01-16 19:46:39 -----------
----------- 2020-01-16 19:46:40 -----------
----------- 2020-01-16 19:46:41 -----------
----------- 2020-01-16 19:46:42 -----------
----------- 2020-01-16 19:46:43 -----------
----------- 2020-01-16 19:46:44 -----------
----------- 2020-01-16 19:46:45 -----------
----------- 2020-01-16 19:46:46 -----------
----------- 2020-01-16 19:46:47 -----------
----------- 2020-01-16 19:46:48 -----------
----------- 2020-01-16 19:46:49 -----------
----------- 2020-01-16 19:46:50 -----------
----------- 2020-01-16 19:46:51 -----------
----------- 2020-01-16 19:46:52 -----------
----------- 2020-01-16 19:46:53 -----------
----------- 2020-01-16 19:46:54 

----------- 2020-01-16 19:49:39 -----------
----------- 2020-01-16 19:49:40 -----------
----------- 2020-01-16 19:49:41 -----------
----------- 2020-01-16 19:49:42 -----------
----------- 2020-01-16 19:49:43 -----------
----------- 2020-01-16 19:49:44 -----------


In [None]:
ssc.stop()
sc.stop()