In [4]:
import threading

# Helper thread to avoid the Spark StreamingContext from blocking Jupyter
        
class StreamingThread(threading.Thread):
    def __init__(self, ssc):
        super().__init__()
        self.ssc = ssc
    def run(self):
        self.ssc.start()
        self.ssc.awaitTermination()
    def stop(self):
        print('----- Stopping... this may take a few seconds -----')
        self.ssc.stop(stopSparkContext=False, stopGraceFully=True)

In [5]:
sc

In [6]:
spark

In [38]:
import random
from pyspark.streaming import StreamingContext
from pyspark.sql import Row
from pyspark.sql.functions import udf, struct, array, col, lit
from pyspark.sql.types import StringType
from pyspark.ml.pipeline import PipelineModel

In [45]:
globals()['models_loaded'] = False
globals()['my_model'] = None

def process(time, rdd):
    if rdd.isEmpty():
        return
    
    print("========= %s =========" % str(time))
    
    # Convert to data frame
    df = spark.read.json(rdd)
    #df.select(["title", "frontpage"]).show()
    
    # Load in the model if not yet loaded:
    if not globals()['models_loaded']:
        # load in your models here
        globals()['my_model'] = PipelineModel.load("gb_Model")
        globals()['models_loaded'] = True
        
    # And then predict using the loaded model (uncomment below):
    
    df_result = globals()['my_model'].transform(df)
    df_result.select(["title", "frontpage", "prediction"]).show()

In [46]:
ssc = StreamingContext(sc, 10)

In [47]:
lines = ssc.socketTextStream("seppe.net", 7778)
lines.foreachRDD(process)

In [48]:
ssc_t = StreamingThread(ssc)
ssc_t.start()

+--------------------+---------+
|               title|frontpage|
+--------------------+---------+
|What makes OpenBS...|    false|
+--------------------+---------+

+--------------------+---------+----------+
|               title|frontpage|prediction|
+--------------------+---------+----------+
|What makes OpenBS...|    false|       0.0|
+--------------------+---------+----------+

+--------------------+---------+
|               title|frontpage|
+--------------------+---------+
|What consumers sh...|    false|
|First Time Ever L...|    false|
+--------------------+---------+

+--------------------+---------+----------+
|               title|frontpage|prediction|
+--------------------+---------+----------+
|What consumers sh...|    false|       1.0|
|First Time Ever L...|    false|       0.0|
+--------------------+---------+----------+

+--------------------+---------+
|               title|frontpage|
+--------------------+---------+
|Show HN: A cross-...|    false|
|Electromigration

In [49]:
ssc_t.stop()

----- Stopping... this may take a few seconds -----
+--------------------+---------+
|               title|frontpage|
+--------------------+---------+
|Show HN: Thru – C...|    false|
|American Airlines...|    false|
|Amateur birder in...|     true|
+--------------------+---------+

+--------------------+---------+----------+
|               title|frontpage|prediction|
+--------------------+---------+----------+
|Show HN: Thru – C...|    false|       0.0|
|American Airlines...|    false|       0.0|
|Amateur birder in...|     true|       1.0|
+--------------------+---------+----------+

+--------------------+---------+
|               title|frontpage|
+--------------------+---------+
|Vaping linked wit...|     true|
+--------------------+---------+

+--------------------+---------+----------+
|               title|frontpage|prediction|
+--------------------+---------+----------+
|Vaping linked wit...|     true|       1.0|
+--------------------+---------+----------+

