In [1]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext
from pyspark.streaming import StreamingContext
from Activity import Activity
from activityByProduct import activityByProduct
from pyspark.streaming.kafka import KafkaUtils
import os
import time
from pyspark.sql import SQLContext
from pyspark.sql.functions import *
from pyspark.sql.types import *

##### Environment setup

In [2]:
os.environ['PYSPARK_SUBMIT_ARGS'] = '--conf spark.ui.port=4040 --packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.0,com.datastax.spark:spark-cassandra-connector_2.11:2.0.0-M3 pyspark-shell'

In [3]:
conf = SparkConf() \
    .setAppName("Streaming test") \
    .setMaster("local[2]") \
    .set("spark.cassandra.connection.host", "127.0.0.1")
sc = SparkContext(conf=conf) 
sqlcontext=SQLContext(sc)

In [4]:
ssc = StreamingContext(sc,2)

##### Kafka with spark streaming 

In [5]:
textDStream1 = KafkaUtils.createStream(ssc, 'localhost:2181', 'spark-streaming', {'data_pipeline':1})
textDStream = textDStream1.map(lambda x: x[1])

##### Streaming

In [6]:
def subprocess(line):
    records = line.split("\t")
    if records[0] != "action" and len(records) == 7:
        return Activity(records[0],records[1],records[2],records[3],records[4],records[5],records[6])
    else:
        return None

In [7]:
def process(rdd):
    if not rdd.isEmpty():
        newRDD = rdd.map(subprocess)
    else:
        newRDD = None
    return newRDD

#### Transformaton

In [8]:
def transformation(inputDF):
    inputDF.createOrReplaceTempView("Activity")
    visitorsByProduct = sqlcontext.sql("SELECT product, timestamp_hour, COUNT(DISTINCT visitor) as unique_visitors FROM Activity GROUP BY product, timestamp_hour")
    activityByProduct = sqlcontext.sql("SELECT product, timestamp_hour, sum(case when action = 'purchase' then 1 else 0 end) as purchase_count, sum(case when action = 'add_to_cart' then 1 else 0 end) as add_to_cart_count, sum(case when action = 'page_view' then 1 else 0 end) as page_view_count from Activity group by product, timestamp_hour").cache()
    return visitorsByProduct,activityByProduct
    

In [9]:
def timestamp_conversion(timestamp):
        if timestamp is not None:
            return time.strftime('%Y-%m-%d', time.gmtime(int(timestamp)/1000))
        else:
            return "null null"

#### UDF for Time Conversion

In [10]:
time_udf = udf(timestamp_conversion,StringType())

#### Send streaming data to both hdfs and cassandra

In [11]:
def process_query(rdd):
    if not rdd.isEmpty():
        newDF=rdd.map(lambda x: (x, )).toDF()
        newDF2 = newDF.select('_1.*')     
     
        
        inputDF = newDF2.withColumn("timestamp_hour",time_udf(newDF2["timestamp_hour"]))
        
        print(newDF2.printSchema())
        print(newDF2.show(10, truncate=False))
        inputDF.write.partitionBy("timestamp_hour").mode("append").csv("hdfs://localhost:9000/kafka_spark",header=True)
        
        inputDF.createOrReplaceTempView("activity")
        print(inputDF.show(10, truncate=False))
        
        visitorsByProduct,activityByProduct=transformation(newDF2)
#         visitorsByProduct.write.format("org.apache.spark.sql.cassandra").mode('append').options(table="stream_visitors_by_product", keyspace="lambda").save()
#         activityByProduct.write.format("org.apache.spark.sql.cassandra").mode('append').options(table="stream_activity_by_product", keyspace="lambda").save()        

###### Start Streaming...

In [12]:
newDStream = textDStream.transform(process)

In [13]:
newDStream.foreachRDD(process_query)

In [14]:
ssc.start()

##### Stop Streaming

In [15]:
# ssc.stop(stopSparkContext=False,stopGraceFully=True)

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+-------+--------+----------------------------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                                         |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+----------------------------------------------------------------+--------+--------------+--------------+
|purchase   |Page-7 |        |Avery,White Easy Peel Return Address Labels                     |Twitter |1522067675777 |Visitor-525555|
|add_to_cart|Page-12|        |Mrs. Meyer's Clean Day,"Liquid Hand Soap Refill, Lavender Scent"|Twitter |1522067676737 |Visitor-399542|
|page_view  |Page-12|        |Honeyw

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+-------+--------+---------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+---------------------------------------+--------+--------------+--------------+
|purchase   |Page-7 |        |Charmin,Ultra Soft                     |Google  |1522067688218 |Visitor-676807|
|add_to_cart|Page-4 |        |Coop,Tiefkühlbeutel mit Druckverschluss|Google  |1522067688698 |Visitor-997945|
|page_view  |Page-2 |        |All,"Mighty Pacs, Free Clear"          |Direct  |1522067688698 |Visitor-277908|
|page_view  |Page-13|        |Safeway,Whole Black P

+-----------+-------+--------+--------------------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                                 |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+--------------------------------------------------------+--------+--------------+--------------+
|purchase   |Page-11|Page-5  |Charmin,Ultra Soft                                      |Internal|2018-03-26    |Visitor-477110|
|add_to_cart|Page-1 |        |Comet,Comet With Bleach                                 |Other   |2018-03-26    |Visitor-318236|
|page_view  |Page-1 |        |Sol Republic,JAX Headphones                             |Twitter |2018-03-26    |Visitor-512775|
|page_view  |Page-5 |        |CVS,2-Ply Tissues                                       |Other   |2018-03-26    |Visitor-914876|
|page_view  |Page-10|        |Banana Boat,Sport Performance- UVA/UVB protection SPF 30|Direct  |2018-03-26    |

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+-------+--------+----------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                       |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+----------------------------------------------+--------+--------------+--------------+
|purchase   |Page-10|        |Kroger,Vanilla Extract                        |Other   |1522067698528 |Visitor-892823|
|add_to_cart|Page-5 |Page-8  |Palmolive,Original Dish Liquid                |Internal|1522067699008 |Visitor-389881|
|page_view  |Page-11|        |l'Oreal,EverStrong Hydrate Conditioner        |Twitter |1522067699008 |Visitor-253713|
|page_vie

+-----------+-------+--------+-----------------------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                                    |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+-----------------------------------------------------------+--------+--------------+--------------+
|purchase   |Page-10|Page-7  |Annie's,Shells & Real Aged Cheddar Macaroni & Cheese       |Internal|2018-03-26    |Visitor-323938|
|add_to_cart|Page-11|Page-4  |Crest,Pro-Health Clean Mint Toothpaste                     |Internal|2018-03-26    |Visitor-145154|
|page_view  |Page-7 |        |Santa Sweets,Grape Tomatoes                                |Direct  |2018-03-26    |Visitor-342720|
|page_view  |Page-7 |Page-4  |The Learning Company,Where in the U.S.A. is Carmen Sandiego|Internal|2018-03-26    |Visitor-731452|
|page_view  |Page-9 |        |Heinz,Tomato Ketchup                                       |

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+------+--------+----------------------------------------------+--------+--------------+--------------+
|action     |page  |prevPage|product                                       |referrer|timestamp_hour|visitor       |
+-----------+------+--------+----------------------------------------------+--------+--------------+--------------+
|purchase   |Page-0|        |Orbit,Spearmint Sugarfree Gum                 |Yahoo   |1522067708844 |Visitor-657611|
|add_to_cart|Page-4|        |Dixie,Paper Plates                            |Facebook|1522067709324 |Visitor-753760|
|page_view  |Page-2|Page-11 |l'Oreal,EverStrong Hydrate Shampoo            |Internal|1522067709324 |Visitor-685650|
|page_view  |Pa

+-----------+-------+--------+----------------------------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                                         |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+----------------------------------------------------------------+--------+--------------+--------------+
|purchase   |Page-5 |Page-13 |CVS Total Home,"Aluminum Foil, Heavy Duty"                      |Internal|2018-03-26    |Visitor-719604|
|add_to_cart|Page-10|Page-10 |Walgreens,Advanced Hand Sanitizer with Aloe                     |Internal|2018-03-26    |Visitor-78980 |
|page_view  |Page-10|        |HDX,Table Top Air Circulator                                    |Yahoo   |2018-03-26    |Visitor-19051 |
|page_view  |Page-4 |        |Gillette,Sensor 3                                               |Yahoo   |2018-03-26    |Visitor-581564|
|page_view  |Page-7 |        |Mrs. Meyer's Clean Day,"L

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+-------+--------+----------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                       |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+----------------------------------------------+--------+--------------+--------------+
|purchase   |Page-7 |        |California Pizza Kitchen,Sicilian Recipe Pizza|Direct  |1522067719152 |Visitor-338540|
|add_to_cart|Page-6 |Page-11 |Duracell,9V Procell Batteries                 |Internal|1522067719632 |Visitor-486507|
|page_view  |Page-2 |        |Avery,White Easy Peel Return Address Labels   |Other   |1522067719632 |Visitor-516631|
|page_vie

+-----------+-------+--------+-------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                    |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+-------------------------------------------+--------+--------------+--------------+
|purchase   |Page-1 |Page-13 |Scrabble,Crossword Game                    |Internal|2018-03-26    |Visitor-272562|
|add_to_cart|Page-1 |        |Scott,One Ply Bathroom Tissue              |Facebook|2018-03-26    |Visitor-44331 |
|page_view  |Page-9 |        |Andersen's,Creamy Soup Split Pea           |Google  |2018-03-26    |Visitor-242789|
|page_view  |Page-0 |        |So Delicious,Coconut Milk                  |Other   |2018-03-26    |Visitor-341836|
|page_view  |Page-11|Page-6  |Purell,Advanced Hand Sanitizer -travel size|Internal|2018-03-26    |Visitor-316330|
|page_view  |Page-11|        |l'Oreal,EverStrong Hydrate Conditioner     |Bing    |2018-

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+-------+--------+-----------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                  |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+-----------------------------------------+--------+--------------+--------------+
|purchase   |Page-8 |        |Secret,pH Balanced - Unscented           |Google  |1522067729423 |Visitor-692029|
|add_to_cart|Page-6 |        |Trader Joe's,Extra Virgin Olive Oil      |Yahoo   |1522067729423 |Visitor-513908|
|page_view  |Page-11|        |O-Live And Company,Extra Virgin Olive Oil|Yahoo   |1522067729423 |Visitor-474945|
|page_view  |Page-13|        |Safeway,W

+-----------+-------+--------+------------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                         |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+------------------------------------------------+--------+--------------+--------------+
|purchase   |Page-6 |        |Hot Shot,Flying Insect Killer                   |Twitter |2018-03-26    |Visitor-231170|
|add_to_cart|Page-11|        |Avery,Heavy-Duty EZD Reference 3-Ring Binder    |Facebook|2018-03-26    |Visitor-510384|
|page_view  |Page-6 |        |Natural Grooming by Herban Cowboy,Dusk Deodorant|Yahoo   |2018-03-26    |Visitor-536675|
|page_view  |Page-4 |        |Harney & Sons,Spiced Black Holiday Tea          |Bing    |2018-03-26    |Visitor-491326|
|page_view  |Page-7 |        |Neutrogena,Alcohol-Free Toner                   |Yahoo   |2018-03-26    |Visitor-100368|
|page_view  |Page-9 |        |Duracell,76A Dural

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+-------+--------+----------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                       |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+----------------------------------------------+--------+--------------+--------------+
|purchase   |Page-13|        |David's Tea,Chocolate Chili Chai Black Tea    |Direct  |1522067739664 |Visitor-16890 |
|add_to_cart|Page-9 |        |Botanics,Hydrating Day Cream All Bright SPF 15|Yahoo   |1522067740144 |Visitor-757528|
|page_view  |Page-0 |        |CVS Total Home,"Aluminum Foil, Heavy Duty"    |Yahoo   |1522067740144 |Visitor-934899|
|page_vie

+-----------+-------+--------+-----------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                        |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+-----------------------------------------------+--------+--------------+--------------+
|purchase   |Page-9 |Page-1  |Charmin,Ultra Soft                             |Internal|2018-03-26    |Visitor-886612|
|add_to_cart|Page-12|        |aussie,Instant Freeze Gel                      |Twitter |2018-03-26    |Visitor-660641|
|page_view  |Page-10|        |Crest,Pro-Health Fluoride Rinse                |Yahoo   |2018-03-26    |Visitor-574738|
|page_view  |Page-10|        |Reynolds,Parchment Paper                       |Other   |2018-03-26    |Visitor-367467|
|page_view  |Page-5 |        |Aveeno,Clear Complexion Foaming Cleanser       |Other   |2018-03-26    |Visitor-578136|
|page_view  |Page-13|        |Expo,Dry Erase Markers    

root
 |-- action: string (nullable = true)
 |-- page: string (nullable = true)
 |-- prevPage: string (nullable = true)
 |-- product: string (nullable = true)
 |-- referrer: string (nullable = true)
 |-- timestamp_hour: string (nullable = true)
 |-- visitor: string (nullable = true)

None
+-----------+-------+--------+-------------------------------------------+--------+--------------+--------------+
|action     |page   |prevPage|product                                    |referrer|timestamp_hour|visitor       |
+-----------+-------+--------+-------------------------------------------+--------+--------------+--------------+
|purchase   |Page-9 |        |Nintendo,Super Mario Galaxy                |Direct  |1522067747877 |Visitor-619066|
|add_to_cart|Page-7 |        |Trident,"White Gum, Peppermint"            |Other   |1522067748357 |Visitor-917067|
|page_view  |Page-8 |        |Kraft,Cool Whip                            |Other   |1522067748837 |Visitor-907825|
|page_view  |Page-6 |      