Skip to content

Commit

Permalink
BIGTOP-1944. Upgrade to Spark 1.5.1
Browse files Browse the repository at this point in the history
This includes the following changes:
* upgrading to 1.5.1 in the BOM
* splitting out some jars to separate packages so that they can be installed
  individually on the appropriate nodes:
** the datanucleus jars
** the Spark YARN Shuffle service jar
** Spark "extras" jars like external connectors
  • Loading branch information
ejono committed Oct 6, 2015
1 parent 5bc4577 commit 50097b7
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 67 deletions.
15 changes: 13 additions & 2 deletions bigtop-packages/src/common/spark/do-component-build
Expand Up @@ -24,9 +24,20 @@ BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${
-Dhadoop.version=$HADOOP_VERSION \
-Dyarn.version=$HADOOP_VERSION \
-Dprotobuf.version=2.5.0 \
-DskipTests -DrecompileMode=all"
-DrecompileMode=all \
-Pbigtop-dist \
-Pyarn -Phadoop-2.6 \
-Phive -Phive-thriftserver \
$SPARK_BUILD_OPTS"

## this might be an issue at times
# http://maven.40175.n5.nabble.com/Not-finding-artifact-in-local-repo-td3727753.html
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -XX:PermSize=1024m -XX:MaxPermSize=1024m"

mvn -Pbigtop-dist -Pyarn -Phive -Phive-thriftserver $BUILD_OPTS install
mvn $BUILD_OPTS -DskipTests install

# Tests must be run after Spark has already been packaged.
# See http://spark.apache.org/docs/latest/building-spark.html#spark-tests-in-maven
if [ "$SPARK_RUN_TESTS" = "true" ]; then
mvn $BUILD_OPTS test
fi
79 changes: 23 additions & 56 deletions bigtop-packages/src/common/spark/install_spark.sh
Expand Up @@ -119,6 +119,10 @@ install -d -m 0755 $PREFIX/$LIB_DIR
install -d -m 0755 $PREFIX/$LIB_DIR/lib
install -d -m 0755 $PREFIX/$LIB_DIR/bin
install -d -m 0755 $PREFIX/$LIB_DIR/sbin
install -d -m 0755 $PREFIX/$LIB_DIR/extras
install -d -m 0755 $PREFIX/$LIB_DIR/extras/lib
install -d -m 0755 $PREFIX/$LIB_DIR/yarn
install -d -m 0755 $PREFIX/$LIB_DIR/yarn/lib
install -d -m 0755 $PREFIX/$DOC_DIR
install -d -m 0755 $PREFIX/$EXAMPLES_DIR

Expand All @@ -134,9 +138,15 @@ tar --wildcards -C $PREFIX/$LIB_DIR/ -zxf ${BUILD_DIR}/assembly/target/spark-ass

rm -rf $PREFIX/$LIB_DIR/bin/*.cmd

# External/extra jars
ls ${BUILD_DIR}/{external,extras}/*/target/*${SPARK_VERSION}.jar | grep -v 'original-\|assembly' | xargs -IJARS cp JARS $PREFIX/$LIB_DIR/extras/lib

# Examples jar
cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib/spark-examples-${SPARK_VERSION}-hadoop${HADOOP_VERSION}.jar

# Spark YARN Shuffle jar
cp ${BUILD_DIR}/network/yarn/target/*/spark-${SPARK_VERSION}-yarn-shuffle.jar $PREFIX/$LIB_DIR/lib/

# Examples src
cp -ra ${BUILD_DIR}/examples/src $PREFIX/$EXAMPLES_DIR/
ln -s $EXAMPLES_DIR $PREFIX/$LIB_DIR/examples
Expand All @@ -150,12 +160,12 @@ chmod 755 $PREFIX/$LIB_DIR/sbin/*
# Copy in the configuration files
install -d -m 0755 $PREFIX/$CONF_DIR
cp -a ${BUILD_DIR}/conf/* $PREFIX/$CONF_DIR
cp $PREFIX/$CONF_DIR/spark-env.sh.template $PREFIX/$CONF_DIR/spark-env.sh
cp $SOURCE_DIR/spark-env.sh $PREFIX/$CONF_DIR
ln -s /etc/spark/conf $PREFIX/$LIB_DIR/conf

# Copy in the wrappers
install -d -m 0755 $PREFIX/$BIN_DIR
for wrap in sbin/spark-executor bin/spark-shell bin/spark-submit; do
for wrap in bin/spark-class bin/spark-shell bin/spark-sql bin/spark-submit; do
cat > $PREFIX/$BIN_DIR/`basename $wrap` <<EOF
#!/bin/bash
Expand All @@ -167,60 +177,6 @@ EOF
chmod 755 $PREFIX/$BIN_DIR/`basename $wrap`
done

cat >> $PREFIX/$CONF_DIR/spark-env.sh <<EOF
export HADOOP_HOME=\${HADOOP_HOME:-/usr/lib/hadoop}
export HADOOP_CONF_DIR=\${HADOOP_CONF_DIR:-/etc/hadoop/conf}
export SPARK_MASTER_IP=\`hostname\`
export SPARK_MASTER_PORT=7077
export SPARK_MASTER_WEBUI_PORT=18080
export SPARK_WORKER_PORT=7078
export SPARK_WORKER_WEBUI_PORT=18081
export SPARK_WORKER_DIR=/var/run/spark/work
export SPARK_HISTORY_OPTS="\$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
export SPARK_LOG_DIR=/var/log/spark
export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:\${HADOOP_HOME}/lib/native
EOF

cat >> $PREFIX/$CONF_DIR/hive-site.xml <<EOF
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:derby:;databaseName=/tmp/spark-\${user.name}/metastore_db;create=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
</configuration>
EOF

ln -s /var/run/spark/work $PREFIX/$LIB_DIR/work

cp -r ${BUILD_DIR}/python ${PREFIX}/${INSTALLED_LIB_DIR}/
Expand All @@ -242,3 +198,14 @@ cp ${BUILD_DIR}/{LICENSE,NOTICE} ${PREFIX}/${LIB_DIR}/

# Version-less symlinks
(cd $PREFIX/$LIB_DIR/lib; ln -s spark-assembly*.jar spark-assembly.jar; ln -s spark-examples*.jar spark-examples.jar)
pushd $PREFIX/$LIB_DIR/yarn/lib
ln -s ../../lib/spark-*-yarn-shuffle.jar spark-yarn-shuffle.jar
ln -s ../../lib/datanucleus-api-jdo*.jar datanucleus-api-jdo.jar
ln -s ../../lib/datanucleus-core*.jar datanucleus-core.jar
ln -s ../../lib/datanucleus-rdbms*.jar datanucleus-rdbms.jar
popd
pushd $PREFIX/$LIB_DIR/extras/lib
for j in $(ls *.jar); do
ln -s $j $(echo $j | sed -n 's/\(.*\)\(_[0-9.]\+-[0-9.]\+\)\(.jar\)/\1\3/p')
done
popd
43 changes: 43 additions & 0 deletions bigtop-packages/src/common/spark/spark-env.sh
@@ -0,0 +1,43 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

export SPARK_HOME=${SPARK_HOME:-/usr/lib/spark}
export SPARK_LOG_DIR=${SPARK_LOG_DIR:-/var/log/spark}

export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop}
export HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-${HADOOP_HOME}/../hadoop-hdfs}
export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_HOME}/../hadoop-mapreduce}
export HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-${HADOOP_HOME}/../hadoop-yarn}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf}

# Let's run everything with JVM runtime, instead of Scala
export SPARK_LAUNCH_WITH_SCALA=0
export SPARK_LIBRARY_PATH=${SPARK_LIBRARY_PATH:-${SPARK_HOME}/lib}
export SCALA_LIBRARY_PATH=${SCALA_LIBRARY_PATH:-${SPARK_HOME}/lib}

# Let's make sure that all needed hadoop libs are added properly
export CLASSPATH="$CLASSPATH:$HADOOP_HOME/*:$HADOOP_HDFS_HOME/*:$HADOOP_YARN_HOME/*:$HADOOP_MAPRED_HOME/*"
export SPARK_LIBRARY_PATH=$SPARK_LIBRARY_PATH:${HADOOP_HOME}/lib/native

export STANDALONE_SPARK_MASTER_HOST=`hostname -f`
export SPARK_MASTER_PORT=7077
export SPARK_MASTER_WEBUI_PORT=18080

export SPARK_WORKER_DIR=${SPARK_WORKER_DIR:-/var/run/spark/work}
export SPARK_WORKER_PORT=7078
export SPARK_WORKER_WEBUI_PORT=18081

export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
15 changes: 15 additions & 0 deletions bigtop-packages/src/deb/spark/control
Expand Up @@ -60,3 +60,18 @@ Architecture: all
Depends: spark-core (= ${source:Version})
Description: Thrift server for Spark SQL
Thrift server for Spark SQL

Package: spark-datanucleus
Architecture: all
Description: DataNucleus libraries for Apache Spark
DataNucleus libraries used by Spark SQL with Hive Support

Package: spark-extras
Architecture: all
Description: External/extra libraries for Apache Spark
External/extra libraries built for Apache Spark but not included in the main assembly JAR (e.g., external streaming libraries)

Package: spark-yarn-shuffle
Architecture: all
Description: Spark YARN Shuffle Service
Spark YARN Shuffle Service
14 changes: 7 additions & 7 deletions bigtop-packages/src/deb/spark/spark-core.install
@@ -1,26 +1,26 @@
/etc/spark
/usr/bin/spark-executor
/usr/bin/spark-class
/usr/bin/spark-submit
/usr/bin/spark-shell
/usr/bin/spark-sql
/usr/lib/spark/LICENSE
/usr/lib/spark/RELEASE
/usr/lib/spark/NOTICE
/usr/lib/spark/bin/beeline
/usr/lib/spark/bin/compute-classpath.sh
/usr/lib/spark/bin/load-spark-env.sh
/usr/lib/spark/bin/run-example
/usr/lib/spark/bin/spark-class
/usr/lib/spark/bin/spark-shell
/usr/lib/spark/bin/spark-sql
/usr/lib/spark/bin/spark-submit
/usr/lib/spark/bin/utils.sh
/usr/lib/spark/conf
/usr/lib/spark/data
/usr/lib/spark/examples
/usr/lib/spark/lib/spark-assembly*.jar
/usr/lib/spark/lib/spark-examples*.jar
/usr/lib/spark/sbin
/usr/lib/spark/work
/usr/lib/spark/lib
/usr/lib/spark/examples
/usr/lib/spark/data
/usr/share/doc/spark
/usr/share/doc/spark*
/var/lib/spark/
/var/log/spark/
/var/run/spark/
Expand Down
2 changes: 2 additions & 0 deletions bigtop-packages/src/deb/spark/spark-datanucleus.install
@@ -0,0 +1,2 @@
/usr/lib/spark/lib/datanucleus-*.jar
/usr/lib/spark/yarn/lib/datanucleus-*.jar
1 change: 1 addition & 0 deletions bigtop-packages/src/deb/spark/spark-extras.install
@@ -0,0 +1 @@
/usr/lib/spark/extras/lib
2 changes: 2 additions & 0 deletions bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
@@ -0,0 +1,2 @@
/usr/lib/spark/lib/spark-*-yarn-shuffle.jar
/usr/lib/spark/yarn/lib/spark-yarn-shuffle.jar
41 changes: 40 additions & 1 deletion bigtop-packages/src/rpm/spark/SPECS/spark.spec
Expand Up @@ -119,6 +119,28 @@ Requires: spark-core = %{version}-%{release}
%description -n spark-thriftserver
Thrift server for Spark SQL

%package -n spark-datanucleus
Summary: DataNucleus libraries for Apache Spark
Group: Development/Libraries

%description -n spark-datanucleus
DataNucleus libraries used by Spark SQL with Hive Support

%package -n spark-extras
Summary: External/extra libraries for Apache Spark
Group: Development/Libraries

%description -n spark-extras
External/extra libraries built for Apache Spark but not included in the main
assembly JAR (e.g., external streaming libraries)

%package -n spark-yarn-shuffle
Summary: Spark YARN Shuffle Service
Group: Development/Libraries

%description -n spark-yarn-shuffle
Spark YARN Shuffle Service

%prep
%setup -n %{spark_name}-%{spark_base_version}

Expand Down Expand Up @@ -175,6 +197,8 @@ done
%{lib_spark}/NOTICE
%{lib_spark}/bin
%{lib_spark}/lib
%exclude %{lib_spark}/lib/datanucleus-*.jar
%exclude %{lib_spark}/lib/spark-*-yarn-shuffle.jar
%{lib_spark}/sbin
%{lib_spark}/data
%{lib_spark}/examples
Expand All @@ -185,8 +209,9 @@ done
%attr(0755,spark,spark) %{var_lib_spark}
%attr(0755,spark,spark) %{var_run_spark}
%attr(0755,spark,spark) %{var_log_spark}
%{bin}/spark-class
%{bin}/spark-shell
%{bin}/spark-executor
%{bin}/spark-sql
%{bin}/spark-submit

%files -n spark-python
Expand All @@ -195,6 +220,20 @@ done
%attr(0755,root,root) %{lib_spark}/bin/pyspark
%{lib_spark}/python

%files -n spark-datanucleus
%defattr(-,root,root,755)
%{lib_spark}/lib/datanucleus-*.jar
%{lib_spark}/yarn/lib/datanucleus-*.jar

%files -n spark-extras
%defattr(-,root,root,755)
%{lib_spark}/extras

%files -n spark-yarn-shuffle
%defattr(-,root,root,755)
%{lib_spark}/lib/spark-*-yarn-shuffle.jar
%{lib_spark}/yarn/lib/spark-yarn-shuffle.jar

%define service_macro() \
%files -n %1 \
%attr(0755,root,root)/%{initd_dir}/%1 \
Expand Down
2 changes: 1 addition & 1 deletion bigtop.bom
Expand Up @@ -282,7 +282,7 @@ bigtop {
name = 'spark'
pkg = 'spark-core'
relNotes = 'Apache Spark'
version { base = '1.3.1'; pkg = base; release = 1 }
version { base = '1.5.1'; pkg = base; release = 1 }
tarball { destination = "$name-${version.base}.tar.gz"
source = "$name-${version.base}.tgz" }
url { download_path = "/$name/$name-${version.base}"
Expand Down

0 comments on commit 50097b7

Please sign in to comment.