Skip to content
Permalink
Browse files

[SPARK-10878][CORE] Fix race condition when multiple clients resolves…

… artifacts at the same time

## What changes were proposed in this pull request?

When multiple clients attempt to resolve artifacts via the `--packages` parameter, they could run into race condition when they each attempt to modify the dummy `org.apache.spark-spark-submit-parent-default.xml` file created in the default ivy cache dir.
This PR changes the behavior to encode UUID in the dummy module descriptor so each client will operate on a different resolution file in the ivy cache dir. In addition, this patch changes the behavior of when and which resolution files are cleaned to prevent accumulation of resolution files in the default ivy cache dir.

Since this PR is a successor of #18801, close #18801. Many codes were ported from #18801. **Many efforts were put here. I think this PR should credit to Victsm .**

## How was this patch tested?

added UT into `SparkSubmitUtilsSuite`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #21251 from kiszk/SPARK-10878.

(cherry picked from commit d3c426a)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
  • Loading branch information...
kiszk authored and vanzin committed May 10, 2018
1 parent 4c49b12 commit 414e4e3d70caa950a63fab1c8cac3314fb961b0c
@@ -22,6 +22,7 @@ import java.lang.reflect.{InvocationTargetException, Modifier, UndeclaredThrowab
import java.net.URL
import java.security.PrivilegedExceptionAction
import java.text.ParseException
import java.util.UUID

import scala.annotation.tailrec
import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
@@ -1209,7 +1210,33 @@ private[spark] object SparkSubmitUtils {

/** A nice function to use in tests as well. Values are dummy strings. */
def getModuleDescriptor: DefaultModuleDescriptor = DefaultModuleDescriptor.newDefaultInstance(
ModuleRevisionId.newInstance("org.apache.spark", "spark-submit-parent", "1.0"))
// Include UUID in module name, so multiple clients resolving maven coordinate at the same time
// do not modify the same resolution file concurrently.
ModuleRevisionId.newInstance("org.apache.spark",
s"spark-submit-parent-${UUID.randomUUID.toString}",
"1.0"))

/**
* Clear ivy resolution from current launch. The resolution file is usually at
* ~/.ivy2/org.apache.spark-spark-submit-parent-$UUID-default.xml,
* ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$UUID-1.0.xml, and
* ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$UUID-1.0.properties.
* Since each launch will have its own resolution files created, delete them after
* each resolution to prevent accumulation of these files in the ivy cache dir.
*/
private def clearIvyResolutionFiles(
mdId: ModuleRevisionId,
ivySettings: IvySettings,
ivyConfName: String): Unit = {
val currentResolutionFiles = Seq(
s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml",
s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.xml",
s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.properties"
)
currentResolutionFiles.foreach { filename =>
new File(ivySettings.getDefaultCache, filename).delete()
}
}

/**
* Resolves any dependencies that were supplied through maven coordinates
@@ -1260,14 +1287,6 @@ private[spark] object SparkSubmitUtils {

// A Module descriptor must be specified. Entries are dummy strings
val md = getModuleDescriptor
// clear ivy resolution from previous launches. The resolution file is usually at
// ~/.ivy2/org.apache.spark-spark-submit-parent-default.xml. In between runs, this file
// leads to confusion with Ivy when the files can no longer be found at the repository
// declared in that file/
val mdId = md.getModuleRevisionId
val previousResolution = new File(ivySettings.getDefaultCache,
s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml")
if (previousResolution.exists) previousResolution.delete

md.setDefaultConf(ivyConfName)

@@ -1288,7 +1307,10 @@ private[spark] object SparkSubmitUtils {
packagesDirectory.getAbsolutePath + File.separator +
"[organization]_[artifact]-[revision](-[classifier]).[ext]",
retrieveOptions.setConfs(Array(ivyConfName)))
resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
val paths = resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
val mdId = md.getModuleRevisionId
clearIvyResolutionFiles(mdId, ivySettings, ivyConfName)
paths
} finally {
System.setOut(sysOut)
}
@@ -256,4 +256,19 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
}
}

test("SPARK-10878: test resolution files cleaned after resolving artifact") {
val main = new MavenCoordinate("my.great.lib", "mylib", "0.1")

IvyTestUtils.withRepository(main, None, None) { repo =>
val ivySettings = SparkSubmitUtils.buildIvySettings(Some(repo), Some(tempIvyPath))
val jarPath = SparkSubmitUtils.resolveMavenCoordinates(
main.toString,
ivySettings,
isTest = true)
val r = """.*org.apache.spark-spark-submit-parent-.*""".r
assert(!ivySettings.getDefaultCache.listFiles.map(_.getName)
.exists(r.findFirstIn(_).isDefined), "resolution files should be cleaned")
}
}
}

0 comments on commit 414e4e3

Please sign in to comment.
You can’t perform that action at this time.