Skip to content

Commit

Permalink
added maven dependency download
Browse files Browse the repository at this point in the history
  • Loading branch information
brkyvz committed Jan 26, 2015
1 parent 7450a99 commit 882c4c8
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 3 deletions.
116 changes: 116 additions & 0 deletions bin/resolve-maven-coordinates.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

MAVEN_SETTINGS_DIR=~/.m2/settings.xml
MAVEN_LOCAL_REPO=~/.m2/repository

# Read user maven configurations if there are any. Update path to local repository if needed.
if [ -e "$MAVEN_SETTINGS_DIR" ]; then
searchAlternateRepo=$(grep "<localRepository>" $MAVEN_SETTINGS_DIR)
if [ ${#searchAlternateRepo[@]} == 1 ]; then
removePrefix=${searchAlternateRepo#<localRepository>}
MAVEN_LOCAL_REPO=${removePrefix%</localRepository>}
fi
fi

# Given the path for a maven coordinate, returns the artifactId and the version
computeArtifactName() {
coordinatePath=$1
stripPrefix=${coordinatePath#*/}
echo ${stripPrefix%.*}
}

# Given a maven coordinate, returns the path to the corresponding jar in the local Maven repo.
# Currently requires the coordinate to be in the form `groupId:artifactId:version`
computeLocalPath() {
coordinate=$1
split=(${coordinate//:/ })
if [ ${#split[@]} != 3 ]; then
echo "Provided Maven Coordinates must be in the form 'groupId:artifactId:version'."
echo "The coordinate provided is: coordinate"
exit 1
fi
groupId=${split[0]//.//}
artifactId=${split[1]}
version=${split[2]}
echo "$MAVEN_LOCAL_REPO/$groupId/$artifactId/$version/$artifactId-$version.jar"
}

CUR_DIR=$PWD

# Removes dependency on Spark (if there is one)
removeSparkDependency() {
artifactName=$1
echo "$artifactName" >> log.txt
# Create empty pom file for the maven plugin to use
> pom.xml
inSpark=false
while read -r line; do
if [[ $line == *"<groupId>org.apache.spark"* ]]; then
inSpark=true
fi
if [[ $inSpark == true ]] && [[ $line == *"</dependency>"* ]]; then
echo "<scope>provided</scope>" >> pom.xml
inSpark=false
fi
echo $line >> pom.xml
done < "/$artifactName.pom"
# bash skips the last line for some reason
echo $line >> pom.xml
}

# Recursive function that gets the first level of dependencies of each maven coordinate.
# We use a recursive function so that if any of the transitive dependencies are Spark, we don't
# include anything related to it in the classpath.
addDependenciesToClasspath() {
pathOfArtifact=$1
if [ ${#pathOfArtifact} -gt 0 ]; then
artifactName=$(computeArtifactName $pathOfArtifact)
cd ${pathOfArtifact%/*}
mavenPath=$pathOfArtifact
> cp.txt
removeSparkDependency $artifactName
mvn dependency:build-classpath -Dmdep.outputFile=cp.txt -DexcludeScope=provided -DexcludeTransitive=true
depClasspath=`cat cp.txt`
depList=(${depClasspath//:/ })
for dep in "${depList[@]}"; do
mavenPath="$mavenPath:$(addDependenciesToClasspath $dep)"
done
echo $mavenPath
fi
}

# The path to jars in the local maven repo that will be appended to the classpath
mavenClasspath=""
if [ ! -z "SPARK_SUBMIT_MAVEN_COORDINATES" ]; then
coordinateList=(${SPARK_SUBMIT_MAVEN_COORDINATES//,/ })
for i in "${coordinateList[@]}"; do
localPath=$(computeLocalPath "$i")
# if jar doesn't exist, download it and all it's dependencies (except Spark)
if [ ! -e "$localPath.jar" ]; then
mvn dependency:get -Dartifact=$i -DremoteRepositories=$SPARK_SUBMIT_MAVEN_REPOS -Dtransitive=false
fi
# add all dependencies of this jar to the classpath
mavenClasspath="$mavenClasspath:$(addDependenciesToClasspath $localPath)"
done
fi

cd $CUR_DIR

echo ${mavenClasspath#:}
2 changes: 2 additions & 0 deletions bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ else
CLASSPATH="$classpath_output"
fi



if [[ "$1" =~ org.apache.spark.tools.* ]]; then
if test -z "$SPARK_TOOLS_JAR"; then
echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/" 1>&2
Expand Down
4 changes: 4 additions & 0 deletions bin/spark-submit
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ while (($#)); do
export SPARK_SUBMIT_OPTS=$2
elif [ "$1" = "--master" ]; then
export MASTER=$2
elif [ "$1" = "--maven" ]; then
export SPARK_SUBMIT_MAVEN_COORDINATES=$2
elif [ "$1" = "--maven_repos" ]; then
export SPARK_SUBMIT_MAVEN_REPOS=$2
fi
shift
done
Expand Down
6 changes: 3 additions & 3 deletions bin/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ function gatherSparkSubmitOpts() {
exit 1
fi

# NOTE: If you add or remove spark-sumbmit options,
# NOTE: If you add or remove spark-submit options,
# modify NOT ONLY this script but also SparkSubmitArgument.scala
SUBMISSION_OPTS=()
APPLICATION_OPTS=()
while (($#)); do
case "$1" in
--master | --deploy-mode | --class | --name | --jars | --py-files | --files | \
--conf | --properties-file | --driver-memory | --driver-java-options | \
--master | --deploy-mode | --class | --name | --jars | --maven | --py-files | --files | \
--conf | --maven_repos | --properties-file | --driver-memory | --driver-java-options | \
--driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \
--total-executor-cores | --executor-cores | --queue | --num-executors | --archives)
if [[ $# -lt 2 ]]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
var name: String = null
var childArgs: ArrayBuffer[String] = new ArrayBuffer[String]()
var jars: String = null
var maven: String = null
var maven_repos: String = null
var verbose: Boolean = false
var isPython: Boolean = false
var pyFiles: String = null
Expand Down Expand Up @@ -224,6 +226,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
| name $name
| childArgs [${childArgs.mkString(" ")}]
| jars $jars
| maven $maven
| verbose $verbose
|
|Spark properties used, including those specified through
Expand Down Expand Up @@ -330,6 +333,14 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
jars = Utils.resolveURIs(value)
parse(tail)

case ("--maven") :: value :: tail =>
maven = value
parse(tail)

case ("--maven_repos") :: value :: tail =>
maven_repos = value
parse(tail)

case ("--conf" | "-c") :: value :: tail =>
value.split("=", 2).toSeq match {
case Seq(k, v) => sparkProperties(k) = v
Expand Down Expand Up @@ -380,6 +391,12 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
| --name NAME A name of your application.
| --jars JARS Comma-separated list of local jars to include on the driver
| and executor classpaths.
| --maven Comma-separated list of maven coordinates of jars to include
| on the driver and executor classpaths. Will search the local
| maven repo, then maven central and any additional remote
| repositories given by --maven_repos.
| --maven_repos Supply additional remote repositories to search for the
| maven coordinates given with --maven.
| --py-files PY_FILES Comma-separated list of .zip, .egg, or .py files to place
| on the PYTHONPATH for Python apps.
| --files FILES Comma-separated list of files to be placed in the working
Expand Down
2 changes: 2 additions & 0 deletions core/src/main/scala/org/apache/spark/util/Utils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1873,6 +1873,8 @@ private[spark] object Utils extends Logging {
}
}

private[spark] case class MavenCoordinates(groupId: String, artifactId: String, version: String)

/**
* A utility class to redirect the child process's stdout or stderr.
*/
Expand Down

0 comments on commit 882c4c8

Please sign in to comment.