Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
giwa committed Sep 20, 2014
1 parent 29c2bc5 commit fe648e3
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
24 changes: 24 additions & 0 deletions examples/src/main/python/streaming/test_oprations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import sys
from operator import add

from pyspark.conf import SparkConf
from pyspark.streaming.context import StreamingContext
from pyspark.streaming.duration import *

if __name__ == "__main__":
if len(sys.argv) != 3:
print >> sys.stderr, "Usage: wordcount <hostname> <port>"
exit(-1)
conf = SparkConf()
conf.setAppName("PythonStreamingNetworkWordCount")
ssc = StreamingContext(conf=conf, duration=Seconds(1))

lines = ssc.socketTextStream(sys.argv[1], int(sys.argv[2]))
words = lines.flatMap(lambda line: line.split(" "))
mapped_words = words.map(lambda word: (word, 1))
count = mapped_words.reduceByKey(add)

count.pyprint()
ssc.start()
# ssc.awaitTermination()
ssc.stop()
1 change: 0 additions & 1 deletion python/pyspark/streaming/dstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def _mergeCombiners(iterator):
combiners[k] = v
else:
combiners[k] = mergeCombiners(combiners[k], v)
return combiners.iteritems()

return shuffled._mapPartitions(_mergeCombiners)

Expand Down

0 comments on commit fe648e3

Please sign in to comment.