In [1]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext
from pyspark.streaming import StreamingContext
from Activity import Activity
from activityByProduct import activityByProduct
from pyspark.streaming.kafka import KafkaUtils
import os
import time
from pyspark.sql import SQLContext
from pyspark.sql.functions import *
from pyspark.sql.types import *

##### Environment setup

In [2]:
os.environ['PYSPARK_SUBMIT_ARGS'] = '--conf spark.ui.port=4040 --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.0,com.datastax.spark:spark-cassandra-connector_2.11:2.0.0-M3 pyspark-shell'

In [3]:
conf = SparkConf() \
    .setAppName("Streaming test") \
    .setMaster("local[2]") \
    .set("spark.cassandra.connection.host", "127.0.0.1")
sc = SparkContext(conf=conf) 
sqlcontext=SQLContext(sc)

In [4]:
ssc = StreamingContext(sc,2)

##### Kafka with spark streaming 

In [5]:
textDStream1 = KafkaUtils.createStream(ssc, 'localhost:2181', 'spark-streaming', {'data_pipeline':1})
textDStream = textDStream1.map(lambda x: x[1])

##### Streaming

In [6]:
def subprocess(line):
    records = line.split("\t")
    if records[0] != "action" and len(records) == 7:
        return Activity(records[0],records[1],records[2],records[3],records[4],records[5],records[6])
    else:
        return None

In [7]:
def process(rdd):
    if not rdd.isEmpty():
        newRDD = rdd.map(subprocess)
    else:
        newRDD = None
    return newRDD

#### Transformaton

In [8]:
def transformation(inputDF):
    inputDF.createOrReplaceTempView("Activity")
    visitorsByProduct = sqlcontext.sql("SELECT product, timestamp_hour, COUNT(DISTINCT visitor) as unique_visitors FROM Activity GROUP BY product, timestamp_hour")
    activityByProduct = sqlcontext.sql("SELECT product, timestamp_hour, sum(case when action = 'purchase' then 1 else 0 end) as purchase_count, sum(case when action = 'add_to_cart' then 1 else 0 end) as add_to_cart_count, sum(case when action = 'page_view' then 1 else 0 end) as page_view_count from Activity group by product, timestamp_hour").cache()
    return visitorsByProduct,activityByProduct
    

In [9]:
def timestamp_conversion(timestamp):
        if timestamp is not None:
            return time.strftime('%Y-%m-%d', time.gmtime(int(timestamp)/1000))
        else:
            return "null null"

#### UDF for Time Conversion

In [10]:
time_udf = udf(timestamp_conversion,StringType())

#### Send streaming data to both hdfs and cassandra

In [11]:
def process_query(rdd):
    if not rdd.isEmpty():
        newDF=rdd.map(lambda x: (x, )).toDF()
        newDF2 = newDF.select('_1.*')     
     
        
        inputDF = newDF2.withColumn("timestamp_hour",time_udf(newDF2["timestamp_hour"]))
        
        print(inputDF.printSchema())
        print(inputDF.show(10, truncate=False))
        inputDF.write.partitionBy("timestamp_hour").mode("append").csv("hdfs://localhost:9000/kafka_spark",header=True)
        
        inputDF.createOrReplaceTempView("activity")
        print(inputDF.show(10, truncate=False))
        
        visitorsByProduct,activityByProduct=transformation(newDF2)
#         visitorsByProduct.write.format("org.apache.spark.sql.cassandra").mode('append').options(table="stream_visitors_by_product", keyspace="lambda").save()
#         activityByProduct.write.format("org.apache.spark.sql.cassandra").mode('append').options(table="stream_activity_by_product", keyspace="lambda").save()        

###### Start Streaming...

In [12]:
newDStream = textDStream.transform(process)

In [13]:
newDStream.foreachRDD(process_query)

In [14]:
ssc.start()

##### Stop Streaming

In [15]:
# ssc.stop(stopSparkContext=False,stopGraceFully=True)

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+-------+--------+-------------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                          |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+-------------------------------------------------+--------+--------------+--------------+
|purchase   |Page-3 |Page-9  |Farmland,Lower Sodium Bacon                      |Internal|2018-03-26    |Visitor-95788 |
|add_to_cart|Page-7 |        |Scrabble,Crossword Game                          |Google  |2018-03-26    |Visitor-897018|
|page_view  |Page-7 |        |Comet,Comet With Bleach                          |Yahoo   |2018-03-26    |Visitor

+-----------+------+--------+--------------------------------------------------------+--------+--------------+--------------+
|action     |page  |prevPage|product                                                 |referrer|timestamp_hour|visitor       |
+-----------+------+--------+--------------------------------------------------------+--------+--------------+--------------+
|purchase   |Page-6|        |Clif Bar,Chocolate Almond Fudge Energy Bar              |Other   |2018-03-26    |Visitor-820160|
|add_to_cart|Page-3|        |Glade,Clean Linen Air Freshener                         |Twitter |2018-03-26    |Visitor-759727|
|page_view  |Page-8|        |Nature Made,D3 400 IU                                   |Facebook|2018-03-26    |Visitor-383408|
|page_view  |Page-0|        |Ralphs,Plastic Wrap                                     |Facebook|2018-03-26    |Visitor-831022|
|page_view  |Page-7|        |Banana Boat,Sport Performance- UVA/UVB protection SPF 30|Bing    |2018-03-26    |Visitor-

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+-------+--------+-----------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                  |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+-----------------------------------------+--------+--------------+--------------+
|purchase   |Page-2 |        |Siggi's,"Icelandic Style Skyr, Blueberry"|Other   |2018-03-26    |Visitor-886143|
|add_to_cart|Page-0 |Page-2  |Gnarly Head,Cabernet Sauvignon           |Internal|2018-03-26    |Visitor-141312|
|page_view  |Page-3 |        |Menscience,Advanced Deodorant            |Twitter |2018-03-26    |Visitor-762450|
|page_view  |Page-0 |        |Pond's,Dr

+---------+-------+--------+------------------------------------------------+--------+--------------+--------------+
|action   |page   |prevPage|product                                         |referrer|timestamp_hour|visitor       |
+---------+-------+--------+------------------------------------------------+--------+--------------+--------------+
|purchase |Page-12|        |Nestle,Leche Condensada                         |Facebook|2018-03-26    |Visitor-192633|
|page_view|Page-4 |        |e.l.f.,Eyebrow Kit- Darl                        |Other   |2018-03-26    |Visitor-622935|
|page_view|Page-8 |        |Garnier Fructis Style,Pure Clean Finishing Paste|Facebook|2018-03-26    |Visitor-529811|
|page_view|Page-8 |        |Spectrum Naturals,Peanut Oil                    |Twitter |2018-03-26    |Visitor-237542|
|page_view|Page-7 |        |Tropicana,Orange Juice with Calcium & Vitamin D |Other   |2018-03-26    |Visitor-768365|
|page_view|Page-7 |        |Biotene,Dry Mouth  Oral Rinse       

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+-------+--------+----------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                       |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+----------------------------------------------+--------+--------------+--------------+
|purchase   |Page-12|        |Lysol,Bathroom Cleaner with Hydrogen Peroxide |Twitter |2018-03-26    |Visitor-475486|
|add_to_cart|Page-12|        |Knorr,Salsa Lista Pizza                       |Other   |2018-03-26    |Visitor-131373|
|page_view  |Page-2 |Page-8  |Jolen,Creme Bleach - Lightens Excess Dark Hair|Internal|2018-03-26    |Visitor-69951 |
|page_vie

+-----------+-------+--------+--------------------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                                 |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+--------------------------------------------------------+--------+--------------+--------------+
|purchase   |Page-3 |        |Nivea,Sensitive Men's Shaving Gel                       |Twitter |2018-03-26    |Visitor-967320|
|add_to_cart|Page-14|        |Pond's,Dry Skin Cream                                   |Bing    |2018-03-26    |Visitor-98042 |
|page_view  |Page-14|        |Meijer,Vitamin C 500 mg                                 |Twitter |2018-03-26    |Visitor-902442|
|page_view  |Page-14|        |Neutrogena,On-the-Spot Acne Treatment                   |Other   |2018-03-26    |Visitor-502764|
|page_view  |Page-5 |        |Banquet,Brown 'N Serve Original Sausage Links           |Bing    |2018-03-26    |

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+-------+--------+----------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                       |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+----------------------------------------------+--------+--------------+--------------+
|purchase   |Page-10|        |Barilla,Rotini                                |Twitter |2018-03-26    |Visitor-749682|
|add_to_cart|Page-0 |        |Bolla,Pinot Grigio                            |Google  |2018-03-26    |Visitor-410638|
|page_view  |Page-7 |        |Trader Joe's,Curry Powder                     |Other   |2018-03-26    |Visitor-529770|
|page_vie

+-----------+-------+--------+--------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                     |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+--------------------------------------------+--------+--------------+--------------+
|purchase   |Page-0 |        |CVS Pharmacy,91% Isopropyl Alcohol          |Other   |2018-03-26    |Visitor-836550|
|add_to_cart|Page-0 |        |Sol Republic,JAX Headphones                 |Twitter |2018-03-26    |Visitor-232579|
|page_view  |Page-13|        |Axe,Axe Anarchy for Her                     |Google  |2018-03-26    |Visitor-591479|
|page_view  |Page-3 |        |Crest,Pro-Health Clean Mint Toothpaste      |Other   |2018-03-26    |Visitor-352904|
|page_view  |Page-3 |        |Vicks,DayQuil Severe Cold & Flu             |Direct  |2018-03-26    |Visitor-502646|
|page_view  |Page-5 |        |Google,Chromecast                           |Other