From b1755a00a4bfe22fcd438eacb085efc45efa7c14 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 29 Aug 2014 15:14:15 -0700 Subject: [PATCH 1/5] Format jar paths properly before adding them to the classpath In addition to this we must also make sure all code downstream is updated to reflect this change. In particular, when the executors fetch files from the driver, the URLs of the resources must be properly formatted. --- .../scala/org/apache/spark/util/Utils.scala | 9 ++++++++- .../org/apache/spark/repl/SparkILoop.scala | 17 +++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 86f646d2af181..b46c0ae091269 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -325,7 +325,14 @@ private[spark] object Utils extends Logging { val targetFile = new File(targetDir, filename) val uri = new URI(url) val fileOverwrite = conf.getBoolean("spark.files.overwrite", false) - uri.getScheme match { + // In Windows, the URL may be the raw path starting with a root drive (e.g. C:/) + // We need to format this correctly so that the drive is not interpreted as a URI scheme + val formattedURI = + uri.getScheme match { + case windowsDrive(d) if isWindows => new URI("file:/" + uri) + case _ => uri + } + formattedURI.getScheme match { case "http" | "https" | "ftp" => logInfo("Fetching " + url + " to " + tempFile) diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 53df599cf8121..0c49f681263ee 100644 --- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -18,6 +18,7 @@ import scala.tools.nsc.interpreter._ import scala.tools.nsc.interpreter.{ Results => IR } import Predef.{ println => _, _ } import java.io.{ BufferedReader, FileReader } +import java.net.URI import java.util.concurrent.locks.ReentrantLock import scala.sys.process.Process import scala.tools.nsc.interpreter.session._ @@ -175,7 +176,7 @@ class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter, } } - class SparkILoopInterpreter extends SparkIMain(settings, out) { + class SparkILoopInterpreter extends SparkIMain(settings, out) with Logging { outer => override lazy val formatting = new Formatting { @@ -194,6 +195,9 @@ class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter, settings.classpath.value)((l, r) => ClassPath.join(l, r)) this.settings.classpath.value = totalClassPath + logError("HELLO") + logError("MY CLASSPATH = " + settings.classpath.value) + intp = new SparkILoopInterpreter } @@ -965,11 +969,9 @@ class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter, def createSparkContext(): SparkContext = { val execUri = System.getenv("SPARK_EXECUTOR_URI") - val jars = SparkILoop.getAddedJars val conf = new SparkConf() .setMaster(getMaster()) .setAppName("Spark shell") - .setJars(jars) .set("spark.repl.class.uri", intp.classServer.uri) if (execUri != null) { conf.set("spark.executor.uri", execUri) @@ -1018,7 +1020,14 @@ object SparkILoop { if (p == "") None else Some(p) } val jars = propJars.orElse(envJars).getOrElse("") - Utils.resolveURIs(jars).split(",").filter(_.nonEmpty) + val resolvedJars = Utils.resolveURIs(jars).split(",").filter(_.nonEmpty) + if (Utils.isWindows) { + // Strip any URI scheme prefix so we can add the correct path to the classpath + // e.g. file:/C:/my/path.jar -> C:/my/path.jar + resolvedJars.map { jar => new URI(jar).getPath.stripPrefix("/") } + } else { + resolvedJars + } } // Designed primarily for use by test code: take a String with a From 0049f1b6354851f53993d5b3c9b4301c59fa047c Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 29 Aug 2014 15:31:11 -0700 Subject: [PATCH 2/5] Remove embarrassing log messages --- repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 0c49f681263ee..b200ba719bf77 100644 --- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -176,7 +176,7 @@ class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter, } } - class SparkILoopInterpreter extends SparkIMain(settings, out) with Logging { + class SparkILoopInterpreter extends SparkIMain(settings, out) { outer => override lazy val formatting = new Formatting { @@ -195,9 +195,6 @@ class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter, settings.classpath.value)((l, r) => ClassPath.join(l, r)) this.settings.classpath.value = totalClassPath - logError("HELLO") - logError("MY CLASSPATH = " + settings.classpath.value) - intp = new SparkILoopInterpreter } From 42bd6264ab0ea3524b63948995125ad72186ecf5 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 29 Aug 2014 16:11:57 -0700 Subject: [PATCH 3/5] Remove unnecessary code This was added before we removed the call to `setJars` in SparkILoop. Back then, `spark.jars` contained the "C:/foo/bar" paths, because we called `setJars` on these paths. After we removed the call to `setJars`, `spark.jars` contains the "file:/C:/foo/bar" paths, in which case the downstream code does not need to be changed. Long story short, `SparkILoop#getAddedJars` is only used for setting class paths on the driver side as of this PR. --- core/src/main/scala/org/apache/spark/util/Utils.scala | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index b46c0ae091269..86f646d2af181 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -325,14 +325,7 @@ private[spark] object Utils extends Logging { val targetFile = new File(targetDir, filename) val uri = new URI(url) val fileOverwrite = conf.getBoolean("spark.files.overwrite", false) - // In Windows, the URL may be the raw path starting with a root drive (e.g. C:/) - // We need to format this correctly so that the drive is not interpreted as a URI scheme - val formattedURI = - uri.getScheme match { - case windowsDrive(d) if isWindows => new URI("file:/" + uri) - case _ => uri - } - formattedURI.getScheme match { + uri.getScheme match { case "http" | "https" | "ftp" => logInfo("Fetching " + url + " to " + tempFile) From 0d5a0c133ea3995c26c81d06dd55c36afe853865 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 1 Sep 2014 16:46:59 -0700 Subject: [PATCH 4/5] Format jar path only for adding to shell classpath This separates the concerns of setting the correct value for `spark.jars` and of setting the correct classpath for the shell. --- .../org/apache/spark/repl/SparkILoop.scala | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala index b200ba719bf77..508462afd49ea 100644 --- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -966,9 +966,11 @@ class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter, def createSparkContext(): SparkContext = { val execUri = System.getenv("SPARK_EXECUTOR_URI") + val jars = SparkILoop.getAddedJars val conf = new SparkConf() .setMaster(getMaster()) .setAppName("Spark shell") + .setJars(jars) .set("spark.repl.class.uri", intp.classServer.uri) if (execUri != null) { conf.set("spark.executor.uri", execUri) @@ -1017,14 +1019,7 @@ object SparkILoop { if (p == "") None else Some(p) } val jars = propJars.orElse(envJars).getOrElse("") - val resolvedJars = Utils.resolveURIs(jars).split(",").filter(_.nonEmpty) - if (Utils.isWindows) { - // Strip any URI scheme prefix so we can add the correct path to the classpath - // e.g. file:/C:/my/path.jar -> C:/my/path.jar - resolvedJars.map { jar => new URI(jar).getPath.stripPrefix("/") } - } else { - resolvedJars - } + Utils.resolveURIs(jars).split(",").filter(_.nonEmpty) } // Designed primarily for use by test code: take a String with a @@ -1058,7 +1053,15 @@ object SparkILoop { if (settings.classpath.isDefault) settings.classpath.value = sys.props("java.class.path") - getAddedJars.foreach(settings.classpath.append(_)) + val addedJars = + if (Utils.isWindows) { + // Strip any URI scheme prefix so we can add the correct path to the classpath + // e.g. file:/C:/my/path.jar -> C:/my/path.jar + getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") } + } else { + getAddedJars + } + addedJars.foreach(settings.classpath.append) repl process settings } From 262c6a2cff8438b568e90fae5a1dc04baf656f0c Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 1 Sep 2014 16:58:10 -0700 Subject: [PATCH 5/5] Oops... Add the new code to the correct place --- .../org/apache/spark/repl/SparkILoop.scala | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 508462afd49ea..d9eeffa86016a 100644 --- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -190,8 +190,16 @@ class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter, require(settings != null) if (addedClasspath != "") settings.classpath.append(addedClasspath) + val addedJars = + if (Utils.isWindows) { + // Strip any URI scheme prefix so we can add the correct path to the classpath + // e.g. file:/C:/my/path.jar -> C:/my/path.jar + SparkILoop.getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") } + } else { + SparkILoop.getAddedJars + } // work around for Scala bug - val totalClassPath = SparkILoop.getAddedJars.foldLeft( + val totalClassPath = addedJars.foldLeft( settings.classpath.value)((l, r) => ClassPath.join(l, r)) this.settings.classpath.value = totalClassPath @@ -1053,15 +1061,7 @@ object SparkILoop { if (settings.classpath.isDefault) settings.classpath.value = sys.props("java.class.path") - val addedJars = - if (Utils.isWindows) { - // Strip any URI scheme prefix so we can add the correct path to the classpath - // e.g. file:/C:/my/path.jar -> C:/my/path.jar - getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") } - } else { - getAddedJars - } - addedJars.foreach(settings.classpath.append) + getAddedJars.foreach(settings.classpath.append(_)) repl process settings }