From a2262cdb7aa30e9f45043f1440d4b02bc3340f9f Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 5 May 2014 22:14:47 -0700 Subject: [PATCH] [SPARK-1735] Add the missing special profiles to make-distribution.sh 73b0cbcc241cca3d318ff74340e80b02f884acbd introduced a few special profiles that are not covered in the `make-distribution.sh`. This affects hadoop versions 2.2.x, 2.3.x, and 2.4.x. Without these special profiles, a java version error for protobufs is thrown at run time. I took the opportunity to rewrite the way we construct the maven command. Previously, the only hadoop version that triggered the `yarn-alpha` profile was 0.23.x, which was inconsistent with the [docs](https://github.com/apache/spark/blob/master/docs/building-with-maven.md). This is now generalized to hadoop versions from 0.23.x to 2.1.x. Author: Andrew Or Closes #660 from andrewor14/hadoop-distribution and squashes the following commits: 6740126 [Andrew Or] Generalize the yarn profile to hadoop versions 2.2+ 88f192d [Andrew Or] Add the required special profiles to make-distribution.sh --- make-distribution.sh | 47 +++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/make-distribution.sh b/make-distribution.sh index ff18d01e7a616..8a63133bc45d4 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -47,7 +47,7 @@ set -o pipefail VERSION=$(mvn help:evaluate -Dexpression=project.version 2>/dev/null | grep -v "INFO" | tail -n 1) if [ $? != 0 ]; then echo -e "You need Maven installed to build Spark." - echo -e "Download Maven from https://maven.apache.org." + echo -e "Download Maven from https://maven.apache.org/" exit -1; fi @@ -131,27 +131,34 @@ cd $FWDIR export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m" -if [ "$SPARK_HIVE" == "true" ]; then - MAYBE_HIVE="-Phive" -else - MAYBE_HIVE="" -fi - -if [ "$SPARK_YARN" == "true" ]; then - if [[ "$SPARK_HADOOP_VERSION" =~ "0.23." ]]; then - mvn clean package -DskipTests -Pyarn-alpha -Dhadoop.version=$SPARK_HADOOP_VERSION \ - -Dyarn.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE -Phadoop-0.23 - else - mvn clean package -DskipTests -Pyarn -Dhadoop.version=$SPARK_HADOOP_VERSION \ - -Dyarn.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE - fi -else - if [[ "$SPARK_HADOOP_VERSION" =~ "0.23." ]]; then - mvn clean package -Phadoop-0.23 -DskipTests -Dhadoop.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE - else - mvn clean package -DskipTests -Dhadoop.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE +BUILD_COMMAND="mvn clean package" + +# Use special profiles for hadoop versions 0.23.x, 2.2.x, 2.3.x, 2.4.x +if [[ "$SPARK_HADOOP_VERSION" =~ ^0\.23\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-0.23"; fi +if [[ "$SPARK_HADOOP_VERSION" =~ ^2\.2\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-2.2"; fi +if [[ "$SPARK_HADOOP_VERSION" =~ ^2\.3\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-2.3"; fi +if [[ "$SPARK_HADOOP_VERSION" =~ ^2\.4\. ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phadoop-2.4"; fi +if [[ "$SPARK_HIVE" == "true" ]]; then BUILD_COMMAND="$BUILD_COMMAND -Phive"; fi +if [[ "$SPARK_YARN" == "true" ]]; then + # For hadoop versions 0.23.x to 2.1.x, use the yarn-alpha profile + if [[ "$SPARK_HADOOP_VERSION" =~ ^0\.2[3-9]\. ]] || + [[ "$SPARK_HADOOP_VERSION" =~ ^0\.[3-9][0-9]\. ]] || + [[ "$SPARK_HADOOP_VERSION" =~ ^1\.[0-9]\. ]] || + [[ "$SPARK_HADOOP_VERSION" =~ ^2\.[0-1]\. ]]; then + BUILD_COMMAND="$BUILD_COMMAND -Pyarn-alpha" + # For hadoop versions 2.2+, use the yarn profile + elif [[ "$SPARK_HADOOP_VERSION" =~ ^2.[2-9]. ]]; then + BUILD_COMMAND="$BUILD_COMMAND -Pyarn" fi + BUILD_COMMAND="$BUILD_COMMAND -Dyarn.version=$SPARK_HADOOP_VERSION" fi +BUILD_COMMAND="$BUILD_COMMAND -Dhadoop.version=$SPARK_HADOOP_VERSION" +BUILD_COMMAND="$BUILD_COMMAND -DskipTests" + +# Actually build the jar +echo -e "\nBuilding with..." +echo -e "\$ $BUILD_COMMAND\n" +${BUILD_COMMAND} # Make directories rm -rf "$DISTDIR"