# Kafka Consumer

In [1]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.0.2,org.postgresql:postgresql:42.1.1 pyspark-shell'

### Import dependencies


In [2]:
#Stream processing
from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
import json

In [3]:
#Text processing
import preprocessor as p
import re
import string

import nltk
from nltk.corpus import stopwords
from nltk import word_tokenize

### Text cleaning

In [4]:
stop_words = set(stopwords.words("english")) #create a set of stopwords
stop_words = stop_words.union(set(("im", "thats","rt"))) #add these stopwords
stop_words = stop_words - set("not") #remove not from the stopwords

p.set_options(p.OPT.URL, p.OPT.MENTION, p.OPT.EMOJI, p.OPT.SMILEY) #params to remove from the text

def preprocess(text):
    
    #remove url, mentions, smiley and emojis
    text = p.clean(text)
    
    #to lowercase
    text = text.lower()
    
    #remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    
    #removing stopwords and stem each word
    tokens = word_tokenize(text)
    
    #is_noun = lambda pos: pos[:2] == 'NN'
    text = [i for i in tokens if not i in stop_words]
    #text = [word for (word, pos) in nltk.pos_tag(tokens) if is_noun(pos)] 
    
    return ' '.join(text)

### Create Spark context and SQL context

In [5]:
sc = SparkContext(appName="StreamProcessorPyspark").getOrCreate()
sqlContext = SQLContext(sc)
sc.setLogLevel("WARN")

### Create Streaming Context

In [6]:
ssc = StreamingContext(sc, 10)
ssc.checkpoint('ssc_checkpoint')

### Connect to Kafka


In [7]:
ip = 'localhost' #35.228.250.247
kafkaParams = {"metadata.broker.list": ip+':9092', "auto.offset.reset": 'largest'}
myStream = KafkaUtils.createDirectStream(ssc, ['DIC'], kafkaParams)

### Process DStream

In [8]:
tweets = myStream.map(lambda item: json.loads(item[1]))
tweets = tweets.map(lambda x: ((x['tag'], x['date']), preprocess(x['tweet']).split()))

pairs = tweets.flatMapValues(lambda x: x)
words = pairs.map(lambda x: ((x[0][0], x[0][1], x[1]), 1))                     

In [9]:
def updateFunction(newValues, runningCount):
    if runningCount is None:
        runningCount = 0
        
    return sum(newValues, runningCount)

counts = words.updateStateByKey(updateFunction)

### Store results into DB

In [10]:
url = "jdbc:postgresql://ec2-54-217-206-65.eu-west-1.compute.amazonaws.com:5432/d9bf9qompakvh6?ssl=true&sslfactory=org.postgresql.ssl.NonValidatingFactory"

properties = {
    "driver": "org.postgresql.Driver",
    "user": "cuapumoqmbkafk",
    "password": "a510ef81c98a872ea8e47b58e7e044fb5f204f56b7019a581aa3b4f9223498f8"
}

def sendPartition(iter):
    for record in iter:
        df = sqlContext.createDataFrame(record)
        df.write.jdbc(url=url, table="word", properties=properties)

counts.foreachRDD(lambda rdd: rdd.foreachPartition(sendPartition))

df = sqlContext.read.jdbc(url=url, table="word", properties=properties) 
df.show()

+----------+---------+-------------+----------+
|      date|frequency|         word|       tag|
+----------+---------+-------------+----------+
|2019-10-16|       14|        happy|     apple|
|2019-10-16|      154|san francisco|earthquake|
|2019-10-15|      130|       scared|earthquake|
|2019-10-16|      100|       damage|earthquake|
|2019-10-16|       40|       deaths|earthquake|
+----------+---------+-------------+----------+



In [11]:
ssc.start()
ssc.awaitTermination(timeout=10)

Traceback (most recent call last):
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/serializers.py", line 590, in dumps
    return cloudpickle.dumps(obj, 2)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 863, in dumps
    cp.dump(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 260, in dump
    return Pickler.dump(self, obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 437, in dump
    self.save(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 771, in save_tuple
    save(element)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/

Py4JJavaError: An error occurred while calling o31.start.
: java.io.IOException: org.apache.spark.SparkException: An exception was raised by Python:
Traceback (most recent call last):
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/serializers.py", line 590, in dumps
    return cloudpickle.dumps(obj, 2)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 863, in dumps
    cp.dump(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 260, in dump
    return Pickler.dump(self, obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 437, in dump
    self.save(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 771, in save_tuple
    save(element)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 400, in save_function
    self.save_function_tuple(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 549, in save_function_tuple
    save(state)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 816, in save_list
    self._batch_appends(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 843, in _batch_appends
    save(tmp[0])
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 400, in save_function
    self.save_function_tuple(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 549, in save_function_tuple
    save(state)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 887, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 400, in save_function
    self.save_function_tuple(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 549, in save_function_tuple
    save(state)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 549, in save
    self.save_reduce(obj=obj, *rv)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 662, in save_reduce
    save(state)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 524, in save
    rv = reduce(self.proto)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/context.py", line 339, in __getnewargs__
    "It appears that you are attempting to reference SparkContext from a broadcast "
Exception: It appears that you are attempting to reference SparkContext from a broadcast variable, action, or transformation. SparkContext can only be used on the driver, not in code that it run on workers. For more information, see SPARK-5063.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/streaming/util.py", line 115, in dumps
    func.func, func.rdd_wrap_func, func.deserializers)))
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/serializers.py", line 600, in dumps
    raise pickle.PicklingError(msg)
_pickle.PicklingError: Could not serialize object: Exception: It appears that you are attempting to reference SparkContext from a broadcast variable, action, or transformation. SparkContext can only be used on the driver, not in code that it run on workers. For more information, see SPARK-5063.

	at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1333)
	at org.apache.spark.streaming.api.python.TransformFunction.writeObject(PythonDStream.scala:100)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:1140)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1496)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
	at java.io.ObjectOutputStream.writeArray(ObjectOutputStream.java:1378)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1174)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
	at java.io.ObjectOutputStream.defaultWriteObject(ObjectOutputStream.java:441)
	at org.apache.spark.streaming.DStreamGraph$$anonfun$writeObject$1.apply$mcV$sp(DStreamGraph.scala:187)
	at org.apache.spark.streaming.DStreamGraph$$anonfun$writeObject$1.apply(DStreamGraph.scala:182)
	at org.apache.spark.streaming.DStreamGraph$$anonfun$writeObject$1.apply(DStreamGraph.scala:182)
	at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1326)
	at org.apache.spark.streaming.DStreamGraph.writeObject(DStreamGraph.scala:182)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at java.io.ObjectStreamClass.invokeWriteObject(ObjectStreamClass.java:1140)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1496)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
	at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
	at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
	at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
	at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348)
	at org.apache.spark.streaming.Checkpoint$$anonfun$serialize$1.apply$mcV$sp(Checkpoint.scala:152)
	at org.apache.spark.streaming.Checkpoint$$anonfun$serialize$1.apply(Checkpoint.scala:152)
	at org.apache.spark.streaming.Checkpoint$$anonfun$serialize$1.apply(Checkpoint.scala:152)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.streaming.Checkpoint$.serialize(Checkpoint.scala:153)
	at org.apache.spark.streaming.StreamingContext.validate(StreamingContext.scala:525)
	at org.apache.spark.streaming.StreamingContext.liftedTree1$1(StreamingContext.scala:573)
	at org.apache.spark.streaming.StreamingContext.start(StreamingContext.scala:572)
	at org.apache.spark.streaming.api.java.JavaStreamingContext.start(JavaStreamingContext.scala:556)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.SparkException: An exception was raised by Python:
Traceback (most recent call last):
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/serializers.py", line 590, in dumps
    return cloudpickle.dumps(obj, 2)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 863, in dumps
    cp.dump(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 260, in dump
    return Pickler.dump(self, obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 437, in dump
    self.save(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 771, in save_tuple
    save(element)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 400, in save_function
    self.save_function_tuple(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 549, in save_function_tuple
    save(state)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 816, in save_list
    self._batch_appends(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 843, in _batch_appends
    save(tmp[0])
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 400, in save_function
    self.save_function_tuple(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 549, in save_function_tuple
    save(state)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 887, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 400, in save_function
    self.save_function_tuple(obj)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/cloudpickle.py", line 549, in save_function_tuple
    save(state)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 549, in save
    self.save_reduce(obj=obj, *rv)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 662, in save_reduce
    save(state)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
    save(v)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/pickle.py", line 524, in save
    rv = reduce(self.proto)
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/context.py", line 339, in __getnewargs__
    "It appears that you are attempting to reference SparkContext from a broadcast "
Exception: It appears that you are attempting to reference SparkContext from a broadcast variable, action, or transformation. SparkContext can only be used on the driver, not in code that it run on workers. For more information, see SPARK-5063.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/streaming/util.py", line 115, in dumps
    func.func, func.rdd_wrap_func, func.deserializers)))
  File "/Users/vittoriodenti/anaconda3/lib/python3.7/site-packages/pyspark/serializers.py", line 600, in dumps
    raise pickle.PicklingError(msg)
_pickle.PicklingError: Could not serialize object: Exception: It appears that you are attempting to reference SparkContext from a broadcast variable, action, or transformation. SparkContext can only be used on the driver, not in code that it run on workers. For more information, see SPARK-5063.

	at org.apache.spark.streaming.api.python.PythonTransformFunctionSerializer$.serialize(PythonDStream.scala:144)
	at org.apache.spark.streaming.api.python.TransformFunction$$anonfun$writeObject$1.apply$mcV$sp(PythonDStream.scala:101)
	at org.apache.spark.streaming.api.python.TransformFunction$$anonfun$writeObject$1.apply(PythonDStream.scala:100)
	at org.apache.spark.streaming.api.python.TransformFunction$$anonfun$writeObject$1.apply(PythonDStream.scala:100)
	at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1326)
	... 63 more
