Skip to content

Commit

Permalink
[MINOR][BRANCH-2.4] Avoid hardcoded py4j-0.10.7-src.zip in Scala
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

This PR targets to deduplicate hardcoded `py4j-0.10.7-src.zip` in order to make py4j upgrade easier.

## How was this patch tested?

N/A

Closes #24772 from HyukjinKwon/backport-minor-py4j.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 6715135)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
HyukjinKwon authored and dongjoon-hyun committed Jun 3, 2019
1 parent c4ed161 commit 30735db
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 3 deletions.
Expand Up @@ -27,12 +27,15 @@ import org.apache.spark.SparkContext
import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}

private[spark] object PythonUtils {
val PY4J_ZIP_NAME = "py4j-0.10.7-src.zip"

/** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */
def sparkPythonPath: String = {
val pythonPath = new ArrayBuffer[String]
for (sparkHome <- sys.env.get("SPARK_HOME")) {
pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.7-src.zip").mkString(File.separator)
pythonPath +=
Seq(sparkHome, "python", "lib", PY4J_ZIP_NAME).mkString(File.separator)
}
pythonPath ++= SparkContext.jarOfObject(this)
pythonPath.mkString(File.pathSeparator)
Expand Down
Expand Up @@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException
import org.apache.hadoop.yarn.util.Records

import org.apache.spark.{SecurityManager, SparkConf, SparkException}
import org.apache.spark.api.python.PythonUtils
import org.apache.spark.deploy.{SparkApplication, SparkHadoopUtil}
import org.apache.spark.deploy.yarn.config._
import org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager
Expand Down Expand Up @@ -1182,7 +1183,7 @@ private[spark] class Client(
val pyArchivesFile = new File(pyLibPath, "pyspark.zip")
require(pyArchivesFile.exists(),
s"$pyArchivesFile not found; cannot run pyspark application in YARN mode.")
val py4jFile = new File(pyLibPath, "py4j-0.10.7-src.zip")
val py4jFile = new File(pyLibPath, PythonUtils.PY4J_ZIP_NAME)
require(py4jFile.exists(),
s"$py4jFile not found; cannot run pyspark application in YARN mode.")
Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath())
Expand Down
Expand Up @@ -33,6 +33,7 @@ import org.scalatest.Matchers
import org.scalatest.concurrent.Eventually._

import org.apache.spark._
import org.apache.spark.api.python.PythonUtils
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.deploy.yarn.config._
import org.apache.spark.internal.Logging
Expand Down Expand Up @@ -257,7 +258,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
// needed locations.
val sparkHome = sys.props("spark.test.home")
val pythonPath = Seq(
s"$sparkHome/python/lib/py4j-0.10.7-src.zip",
s"$sparkHome/python/lib/${PythonUtils.PY4J_ZIP_NAME}",
s"$sparkHome/python")
val extraEnvVars = Map(
"PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),
Expand Down

0 comments on commit 30735db

Please sign in to comment.