In [62]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [63]:
sc

In [64]:
from threading import Thread

class StreamingThread(Thread):
    def __init__(self, ssc):
        Thread.__init__(self)
        self.ssc = ssc
    def run(self):
        ssc.start()
        ssc.awaitTermination()
    def stop(self):
        print('----- Stopping... this may take a few seconds -----')
        self.ssc.stop(stopSparkContext=False, stopGraceFully=True)

In [65]:
spark

In [66]:
from pyspark.streaming import StreamingContext
from pyspark.sql import Row
from pyspark.sql.functions import udf, struct
from pyspark.sql.types import IntegerType
from pyspark.ml import PipelineModel
from pyspark.sql import functions as fn

In [67]:
globals()['models_loaded'] = False

path = "lrm_bin.model"

def predict(df):
    # Replace this with something smarter
    
    #print('models_loaded: ' + str(models_loaded))
    
    if any([x in df.review_text for x in ['not', 'bad', 'terrible', 'zero', 'one']]):
        return 0
    return 0

predict_udf = udf(predict, IntegerType())

def process(time, rdd):
    if rdd.isEmpty():
        return
    print("========= %s =========" % str(time))
    
    # Convert to data frame
    df_original = spark.read.json(rdd)
    df_with_concat = df_original.withColumn('review_concat',fn.concat(fn.col('review_title'),fn.lit(' '), fn.col('review_text')))
    df_original_light = df_with_concat.select('review_score', 'review_concat')
    #df_original_light.show()
    
    # Utilize our predict function
    #df_withpreds = df_light.withColumn("pred", predict_udf(struct([df_with_concat[x] for x in df_with_concat.columns])))

    #df_withpreds = df_with_concat.withColumn("pred", predict_udf(struct([df_with_concat[x] for x in df_with_concat.columns])))
    #df_withpreds.show()
    #print('df size + ' + str(df_withpreds.count()))

    # In this case, you need to prevent loading your model in every call to "process" as follows:
    # Load in the model if not yet loaded:
    if not globals()['models_loaded']:
        # load in your models here
        #globals()['my_model'] = 'my_model' # Replace this with:    [...].load('my_logistic_regression')
        globals()['my_model'] = PipelineModel.load(path)
        globals()['models_loaded'] = True
        print('****** Model loaded ******')  
        
    # Predict using the model: 
    df_result = globals()['my_model'].transform(df_with_concat)
    df_result_light = df_result.select('book_title', 'review_concat', 'review_score', 'prediction')
    df_result_light.show()

In [68]:
ssc = StreamingContext(sc, 10)

In [69]:
lines = ssc.socketTextStream("seppe.net", 7778)
lines.foreachRDD(process)

In [70]:
ssc_t = StreamingThread(ssc)
ssc_t.start()
print('****** Starting prediction ******')
print('  - Prediction = 0 means bad review (score 1 or 2)')
print('  - Prediction = 1 means good review (score 3 to 5) \n')

****** Starting prediction ******
  - Prediction = 0 means bad review (score 1 or 2)
  - Prediction = 1 means good review (score 3 to 5) 

****** Model loaded ******
+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+--------------------+--------------------+------------+----------+
|The Great Alone: ...|Well written fami...|           4|       1.0|
+--------------------+--------------------+------------+----------+

+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+--------------------+--------------------+------------+----------+
|Sunset Beach: A N...|Sunset Beach Grea...|           4|       1.0|
+--------------------+--------------------+------------+----------+

+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+---------------

+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+--------------------+--------------------+------------+----------+
|The Great Alone: ...|Well written fami...|           4|       1.0|
+--------------------+--------------------+------------+----------+

+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+--------------------+--------------------+------------+----------+
|Sunset Beach: A N...|Sunset Beach Grea...|           4|       1.0|
+--------------------+--------------------+------------+----------+

+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+--------------------+--------------------+------------+----------+
|Sunset Beach: A N...|Sunset Beach Grea...|           4|       1.0|
+--------------------+--------------------+---

+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+--------------------+--------------------+------------+----------+
|The Great Alone: ...|I recommend this ...|           4|       1.0|
+--------------------+--------------------+------------+----------+

+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+--------------------+--------------------+------------+----------+
|The 18th Abductio...|2 thi Very dark a...|           4|       1.0|
+--------------------+--------------------+------------+----------+

+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+--------------------+--------------------+------------+----------+
|Full Count: The E...|Perfect! Bought a...|           5|       1.0|
+--------------------+--------------------+---

+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+--------------------+--------------------+------------+----------+
|Spying on the Sou...|Tony Horwitz cont...|           5|       1.0|
+--------------------+--------------------+------------+----------+

+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+--------------------+--------------------+------------+----------+
|The Great Alone: ...|I recommend this ...|           4|       1.0|
+--------------------+--------------------+------------+----------+

+--------------------+--------------------+------------+----------+
|          book_title|       review_concat|review_score|prediction|
+--------------------+--------------------+------------+----------+
|The 18th Abductio...|2 thi Very dark a...|           4|       1.0|
+--------------------+--------------------+---

In [61]:
ssc_t.stop()

----- Stopping... this may take a few seconds -----
