diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index dc68b1fbda8bb..b66c3ba4d5fb0 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -57,7 +57,6 @@ object PythonRunner { val builder = new ProcessBuilder(Seq(pythonExec, "-u", formattedPythonFile) ++ otherArgs) val env = builder.environment() env.put("PYTHONPATH", pythonPath) - env.put("PYSPARK_PYTHON", pythonExec) env.put("PYSPARK_GATEWAY_PORT", "" + gatewayServer.getListeningPort) builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize val process = builder.start() diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index f3c6d231ab777..6650a4cd10be4 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -118,4 +118,5 @@ def run(self): java_import(gateway.jvm, "org.apache.spark.sql.hive.LocalHiveContext") java_import(gateway.jvm, "org.apache.spark.sql.hive.TestHiveContext") java_import(gateway.jvm, "scala.Tuple2") + return gateway diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py index d152485a3a17d..f75d7c7f1b259 100644 --- a/python/pyspark/streaming/dstream.py +++ b/python/pyspark/streaming/dstream.py @@ -414,7 +414,7 @@ def get_output(rdd, time): # TODO: implement countByWindow # TODO: implement reduceByWindow -# transform Operation +# Transform Operation # TODO: implement transform # TODO: implement transformWith # Following operation has dependency with transform @@ -423,7 +423,7 @@ def get_output(rdd, time): # TODO: implement cogroup # TODO: implement join # TODO: implement leftOuterJoin -# TODO: implemtnt rightOuterJoin +# TODO: implement rightOuterJoin class PipelinedDStream(DStream):