In [1]:
# Demand Prediction Streaming Data

# Step 1. Configure and Deploy flume config file -> flume_streaming.conf
# Step 2. Start Flume Agent -> flume_source_data_ingest.txt
# Step 3- Send some sample JSON data to flume -> flume_source_data_ingest.txt
# Step 4 - Spark streaming to pull data from spark flume sink using receivers (Run this notebook)
# Step 5 - Spark streaming application processes and persist the streaming result to RDBMS (Run this notebook)

import os
from pyspark.sql import SparkSession, SQLContext
from pyspark import SparkContext, SparkConf
from pyspark.sql.functions import explode
from pyspark.sql.functions import countDistinct, avg
from pyspark.sql.functions import dayofmonth,dayofyear,year,month,hour,weekofyear,date_format
from pyspark.sql.functions import col as func_col
from pyspark.sql.functions import lit
from pyspark.sql.functions import *
from pyspark.ml import Pipeline
from pyspark.ml import PipelineModel
from pyspark.ml.regression import GBTRegressor
from pyspark.ml.feature import VectorIndexer
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.streaming import StreamingContext
from pyspark.streaming.flume import FlumeUtils
import json
from pyspark.sql import Row


In [2]:
user_id = 'Edureka_749763'
app_name = '{0} : Spark SQL'.format(user_id)

In [3]:
#Configuration of the Spark Session
conf = SparkConf()  # create the configuration
conf.set('spark.driver.extraClassPath', "/usr/share/cmf/common_jars/mysql-connector-java-5.1.15.jar")  
conf.set('spark.executor.extraClassPath', "/usr/share/cmf/common_jars/mysql-connector-java-5.1.15.jar")
#conf.set('spark.driver.extraClassPath', "/usr/share/java/mysql-connector-java-5.1.42-bin.jar")  
#conf.set('spark.executor.extraClassPath', "/usr/share/java/mysql-connector-java-5.1.42-bin.jar")
#os.environ['SPARK_CLASSPATH'] = "/usr/share/java/mysql-connector-java-5.1.42-bin.jar" 
#Spark Session object
spark = SparkSession.builder.config(conf=conf).appName(app_name).getOrCreate()

In [4]:
stages = []
assemblerInputs = ["season_1","season_2","season_3", "temp", "atemp", "humidity", "windspeed"]
Vectassembler = VectorAssembler(inputCols=assemblerInputs, outputCol='features')
stages += [Vectassembler]

pipeline = Pipeline(stages = stages)
reloaded_model = PipelineModel.load("/user/edureka_749763/models_gradient_boost_regressor")
seasons_code={1 : [0.0,0.0,0.0] , 2 : [0.0,1.0,0.0], 3:[0.0,0.0,1.0], 4:[1.0,0.0,0.0]}
streamSchema = StructType([
    StructField("season_1", DoubleType(),nullable=True),
    StructField("season_2", DoubleType(),nullable=True),
    StructField("season_3", DoubleType(),nullable=True),
    StructField("weather", IntegerType(),nullable=True),                    
    StructField("temp", DoubleType(),nullable=True),
    StructField("atemp", DoubleType(),nullable=True),
    StructField("humidity", DoubleType(),nullable=True),
    StructField("windspeed", DoubleType(),nullable=True)
])

In [None]:
def process(rdd): 
   
    df = spark.createDataFrame(rdd, schema=streamSchema)
    df.show()
    cols = df.columns
    pipelineModel = pipeline.fit(df)
    df = pipelineModel.transform(df)
    #dataframe features selection 
    selectedCols = ['features']+cols
    df = df.select(selectedCols)
    # Make predictions.
    temp_df=reloaded_model.transform(df)        
    temp_df.select("features","prediction").show()        
    temp_df=temp_df.drop("features","indexedFeatures")        
    #Storing the Data in the table created for storing the results of the predictions into the RDBMS Table       
    (temp_df.write.mode('append').format('jdbc').options(
        url='jdbc:mysql://dbserver.edu.cloudlab.com/labuser_database',
        driver='com.mysql.jdbc.Driver',
        dbtable='bike_streaming_prediction_result',
        user='edu_labuser',
        password='edureka').save()
    )        
    #reading of the Data again from the table and show the sample of results
    dataframe_mysql = spark.read.format("jdbc").option("url", 'jdbc:mysql://dbserver.edu.cloudlab.com/labuser_database').option("driver", "com.mysql.jdbc.Driver").option("dbtable", "bike_streaming_prediction_result").option("user", 'edu_labuser').option("password", "edureka").load() 
    dataframe_mysql.show(5)    


In [None]:
ssc= StreamingContext(spark.sparkContext, 20)
flumeStream = FlumeUtils.createPollingStream(ssc, [('ip-20-0-41-62.ec2.internal' , 9090)])
flumeStream.count().pprint()
lines = flumeStream.map(lambda x: x[1])
records_dict=lines.map(lambda x: json.loads(x))
records_dict.map(lambda y: y)
rows_rdd=records_dict.map(lambda res: Row(seasons_code[res['season']][0],seasons_code[res['season']][1],seasons_code[res['season']][2],res['weather'],res['temp'],res['atemp'],res['humidity'],res['windspeed'] ))
rows_rdd.foreachRDD(process)
ssc.start() # Start the computation
ssc.awaitTermination() # Wait for the computation to terminate

-------------------------------------------
Time: 2020-07-31 14:17:20
-------------------------------------------
2

+--------+--------+--------+-------+-----+------+--------+---------+
|season_1|season_2|season_3|weather| temp| atemp|humidity|windspeed|
+--------+--------+--------+-------+-----+------+--------+---------+
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|
+--------+--------+--------+-------+-----+------+--------+---------+

+--------------------+------------------+
|            features|        prediction|
+--------------------+------------------+
|[0.0,1.0,0.0,29.8...|157.51984646308424|
|[0.0,1.0,0.0,29.8...|157.51984646308424|
+--------------------+------------------+

+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|season_1|season_2|season_3|weather| temp| atemp|humidity|windspeed|        prediction|
+--------+--------+--------+-------+---

-------------------------------------------
Time: 2020-07-31 14:19:00
-------------------------------------------

+--------+--------+--------+-------+----+-----+--------+---------+
|season_1|season_2|season_3|weather|temp|atemp|humidity|windspeed|
+--------+--------+--------+-------+----+-----+--------+---------+
+--------+--------+--------+-------+----+-----+--------+---------+

+--------+----------+
|features|prediction|
+--------+----------+
+--------+----------+

+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|season_1|season_2|season_3|weather| temp| atemp|humidity|windspeed|        prediction|
+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|     0.0|     0.0|     0.0|      1|19.84|14.395|    81.9|     16.9|138.55820247208138|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|

-------------------------------------------
Time: 2020-07-31 14:21:20
-------------------------------------------

+--------+--------+--------+-------+----+-----+--------+---------+
|season_1|season_2|season_3|weather|temp|atemp|humidity|windspeed|
+--------+--------+--------+-------+----+-----+--------+---------+
+--------+--------+--------+-------+----+-----+--------+---------+

+--------+----------+
|features|prediction|
+--------+----------+
+--------+----------+

+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|season_1|season_2|season_3|weather| temp| atemp|humidity|windspeed|        prediction|
+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|     0.0|     0.0|     0.0|      1|19.84|14.395|    81.9|     16.9|138.55820247208138|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|

-------------------------------------------
Time: 2020-07-31 14:23:40
-------------------------------------------

+--------+--------+--------+-------+----+-----+--------+---------+
|season_1|season_2|season_3|weather|temp|atemp|humidity|windspeed|
+--------+--------+--------+-------+----+-----+--------+---------+
+--------+--------+--------+-------+----+-----+--------+---------+

+--------+----------+
|features|prediction|
+--------+----------+
+--------+----------+

+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|season_1|season_2|season_3|weather| temp| atemp|humidity|windspeed|        prediction|
+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|     0.0|     0.0|     0.0|      1|19.84|14.395|    81.9|     16.9|138.55820247208138|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|

-------------------------------------------
Time: 2020-07-31 14:26:00
-------------------------------------------

+--------+--------+--------+-------+----+-----+--------+---------+
|season_1|season_2|season_3|weather|temp|atemp|humidity|windspeed|
+--------+--------+--------+-------+----+-----+--------+---------+
+--------+--------+--------+-------+----+-----+--------+---------+

+--------+----------+
|features|prediction|
+--------+----------+
+--------+----------+

+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|season_1|season_2|season_3|weather| temp| atemp|humidity|windspeed|        prediction|
+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|     0.0|     0.0|     0.0|      1|19.84|14.395|    81.9|     16.9|138.55820247208138|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|

-------------------------------------------
Time: 2020-07-31 14:28:20
-------------------------------------------

+--------+--------+--------+-------+----+-----+--------+---------+
|season_1|season_2|season_3|weather|temp|atemp|humidity|windspeed|
+--------+--------+--------+-------+----+-----+--------+---------+
+--------+--------+--------+-------+----+-----+--------+---------+

+--------+----------+
|features|prediction|
+--------+----------+
+--------+----------+

+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|season_1|season_2|season_3|weather| temp| atemp|humidity|windspeed|        prediction|
+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|     0.0|     0.0|     0.0|      1|19.84|14.395|    81.9|     16.9|138.55820247208138|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|

-------------------------------------------
Time: 2020-07-31 14:30:40
-------------------------------------------

+--------+--------+--------+-------+----+-----+--------+---------+
|season_1|season_2|season_3|weather|temp|atemp|humidity|windspeed|
+--------+--------+--------+-------+----+-----+--------+---------+
+--------+--------+--------+-------+----+-----+--------+---------+

+--------+----------+
|features|prediction|
+--------+----------+
+--------+----------+

+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|season_1|season_2|season_3|weather| temp| atemp|humidity|windspeed|        prediction|
+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|     0.0|     0.0|     0.0|      1|19.84|14.395|    81.9|     16.9|138.55820247208138|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|

-------------------------------------------
Time: 2020-07-31 14:33:00
-------------------------------------------

+--------+--------+--------+-------+----+-----+--------+---------+
|season_1|season_2|season_3|weather|temp|atemp|humidity|windspeed|
+--------+--------+--------+-------+----+-----+--------+---------+
+--------+--------+--------+-------+----+-----+--------+---------+

+--------+----------+
|features|prediction|
+--------+----------+
+--------+----------+

+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|season_1|season_2|season_3|weather| temp| atemp|humidity|windspeed|        prediction|
+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|     0.0|     0.0|     0.0|      1|19.84|14.395|    81.9|     16.9|138.55820247208138|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|

-------------------------------------------
Time: 2020-07-31 14:35:20
-------------------------------------------

+--------+--------+--------+-------+----+-----+--------+---------+
|season_1|season_2|season_3|weather|temp|atemp|humidity|windspeed|
+--------+--------+--------+-------+----+-----+--------+---------+
+--------+--------+--------+-------+----+-----+--------+---------+

+--------+----------+
|features|prediction|
+--------+----------+
+--------+----------+

+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|season_1|season_2|season_3|weather| temp| atemp|humidity|windspeed|        prediction|
+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|     0.0|     0.0|     0.0|      1|19.84|14.395|    81.9|     16.9|138.55820247208138|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|

-------------------------------------------
Time: 2020-07-31 14:37:40
-------------------------------------------

+--------+--------+--------+-------+----+-----+--------+---------+
|season_1|season_2|season_3|weather|temp|atemp|humidity|windspeed|
+--------+--------+--------+-------+----+-----+--------+---------+
+--------+--------+--------+-------+----+-----+--------+---------+

+--------+----------+
|features|prediction|
+--------+----------+
+--------+----------+

+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|season_1|season_2|season_3|weather| temp| atemp|humidity|windspeed|        prediction|
+--------+--------+--------+-------+-----+------+--------+---------+------------------+
|     0.0|     0.0|     0.0|      1|19.84|14.395|    81.9|     16.9|138.55820247208138|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|
|     0.0|     1.0|     0.0|      1|29.84|19.395|    89.9|     19.9|157.51984646308424|

In [None]:
## Done!