Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
giwa committed Sep 20, 2014
1 parent 1df77f5 commit 9ad6855
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,8 @@ private[spark] object PythonRDD extends Logging {
} catch {
case eof: EOFException => {}
}
println("RDDDD ==================")
println(objs)
JavaRDD.fromRDD(sc.sc.parallelize(objs, parallelism))
}

Expand Down
10 changes: 7 additions & 3 deletions examples/src/main/python/streaming/test_oprations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@
conf = SparkConf()
conf.setAppName("PythonStreamingNetworkWordCount")
ssc = StreamingContext(conf=conf, duration=Seconds(1))
ssc.checkpoint("/tmp/spark_ckp")

test_input = ssc._testInputStream([1,1,1,1])
mapped = test_input.map(lambda x: (x, 1))
mapped.pyprint()
test_input = ssc._testInputStream([[1],[1],[1]])
# ssc.checkpoint("/tmp/spark_ckp")
fm_test = test_input.flatMap(lambda x: x.split(" "))
mapped_test = fm_test.map(lambda x: (x, 1))


mapped_test.print_()
ssc.start()
# ssc.awaitTermination()
# ssc.stop()
6 changes: 5 additions & 1 deletion python/pyspark/streaming/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,10 @@ def _testInputStream(self, test_input, numSlices=None):
# Calling the Java parallelize() method with an ArrayList is too slow,
# because it sends O(n) Py4J commands. As an alternative, serialized
# objects are written to a file and loaded through textFile().
tempFile = NamedTemporaryFile(delete=False, dir=self._sc._temp_dir)

#tempFile = NamedTemporaryFile(delete=False, dir=self._sc._temp_dir)
tempFile = open("/tmp/spark_rdd", "wb")

# Make sure we distribute data evenly if it's smaller than self.batchSize
if "__len__" not in dir(test_input):
c = list(test_input) # Make it a list so we can compute its length
Expand All @@ -157,6 +160,7 @@ def _testInputStream(self, test_input, numSlices=None):
else:
serializer = self._sc._unbatched_serializer
serializer.dump_stream(test_input, tempFile)
tempFile.flush()
tempFile.close()
print tempFile.name
jinput_stream = self._jvm.PythonTestInputStream(self._jssc,
Expand Down

0 comments on commit 9ad6855

Please sign in to comment.