Browse files

Include dependencies with Hadoop when publishing jar to maven

Summary: As external dependencies are added to Hadoop, we also have to add them to HBase, because the Hadoop .pom file we publish to the local Maven repository does not contain dependencies. Here I am trying to use the makepom target to auto-generate those dependencies from ivy. Also, I am setting Hadoop version to 0.20. I will double-check that this works correctly with build_all.sh in the warehouse branch.

Test Plan:
In Hadoop directory:
ant clean
copy-hdfs-jars-to-maven.sh

In HBase directory:
Build HBase

In the warehouse branch checkout:
./build_all.sh --hadoop=true

Reviewers: pritam, avf, weiyan, sdong, dms

Reviewed By: pritam
  • Loading branch information...
1 parent 8e53f44 commit eb7d5cd48a1de49a6edb85e160caf0012bfc0ca5 mbautin committed with Alex Feinberg Jul 24, 2012
Showing with 97 additions and 32 deletions.
  1. +9 −5 build.xml
  2. +28 −27 copy-hdfs-jars-to-maven.sh
  3. +60 −0 edit_generated_pom.py
View
14 build.xml
@@ -27,7 +27,7 @@
<property name="Name" value="Facebook's unified version of Apache Hadoop"/>
<property name="name" value="hadoop"/>
- <property name="version" value="0.20.1-dev"/>
+ <property name="version" value="0.20"/>
<property name="final.name" value="${name}-${version}"/>
<property name="year" value="2009"/>
@@ -203,15 +203,15 @@
<property name="common.ivy.lib.dir" location="${build.ivy.lib.dir}/${ant.project.name}/common"/>
<property name="build.ivy.report.dir" location="${build.ivy.dir}/report" />
<property name="build.ivy.maven.dir" location="${build.ivy.dir}/maven" />
- <property name="build.ivy.maven.pom" location="${build.ivy.maven.dir}/hadoop-core-${hadoop.version}.pom" />
- <property name="build.ivy.maven.jar" location="${build.ivy.maven.dir}/hadoop-core-${hadoop.version}.jar" />
+ <property name="build.ivy.maven.pom" location="${build.ivy.maven.dir}/hadoop-core-${version}.pom" />
+ <property name="build.ivy.maven.jar" location="${build.ivy.maven.dir}/hadoop-core-${version}.jar" />
<!--this is the naming policy for artifacts we want pulled down-->
<property name="ivy.artifact.retrieve.pattern" value="${ant.project.name}/[conf]/[artifact]-[revision].[ext]"/>
<!--this is how artifacts that get built are named-->
<property name="ivy.publish.pattern" value="hadoop-[revision]-core.[ext]"/>
- <property name="hadoop.jar" location="${build.dir}/hadoop-${hadoop.version}-core.jar" />
+ <property name="hadoop.jar" location="${build.dir}/hadoop-${version}-core.jar" />
<!-- jdiff.home property set -->
<property name="jdiff.home" value="${build.ivy.lib.dir}/${ant.project.name}/jdiff"/>
@@ -1945,7 +1945,7 @@
<ivy:publish
settingsRef="${ant.project.name}.ivy.settings"
resolver="local"
- pubrevision="${hadoop.version}"
+ pubrevision="${version}"
overwrite="true"
artifactspattern="${build.dir}/${ivy.publish.pattern}" />
</target>
@@ -2040,5 +2040,9 @@
</exec>
</target>
<!-- end of task-controller target -->
+
+ <target name="print-version">
+ <echo message="${version}" />
+ </target>
</project>
View
55 copy-hdfs-jars-to-maven.sh
@@ -5,59 +5,60 @@
# and test) built in titan/VENDOR/hadoop-0.20/.
#
+set -e -u -o pipefail
BASEDIR=`dirname $0`
cd ${BASEDIR}
-if [ ! -f build/hadoop-0.20.1-dev-core.jar ]; then
- if [ ! -f build/hadoop-0.20-core.jar ]; then
- echo "core jar not found. Running 'ant jar'..."
- ant jar | grep BUILD;
- fi
+VERSION=$( ant -q print-version | head -1 | awk '{print $2}' )
+if [ -z "$VERSION" ]; then
+ echo "Unable to determine Hadoop version" >&2
+ exit 1
fi
-if [ ! -f build/hadoop-0.20.1-dev-test.jar ]; then
- if [ ! -f build/hadoop-0.20-test.jar ]; then
- echo "test jar not found. Running 'ant jar-test'..."
- ant jar-test | grep BUILD;
- fi
+TARGETS=""
+
+CORE_JAR=build/hadoop-$VERSION-core.jar
+if [ ! -f $CORE_JAR ]; then
+ TARGETS="$TARGETS jar"
fi
+CORE_POM=build/ivy/maven/generated.pom
+if [ ! -f $CORE_POM ]; then
+ TARGETS="$TARGETS makepom"
+fi
-#
-# The names of core/test jar name depend
-# on whether they were generated using
-# build_all.sh script or just the vanilla
-# simple ant jar/jar-test
-#
-if [ -f build/hadoop-0.20.1-dev-core.jar ]; then
- CORE_JAR=build/hadoop-0.20.1-dev-core.jar
-else
- CORE_JAR=build/hadoop-0.20-core.jar
+TEST_JAR=build/hadoop-$VERSION-test.jar
+if [ ! -f $TEST_JAR ]; then
+ TARGETS="$TARGETS jar-test"
fi
-if [ -f build/hadoop-0.20.1-dev-test.jar ]; then
- TEST_JAR=build/hadoop-0.20.1-dev-test.jar
-else
- TEST_JAR=build/hadoop-0.20-test.jar
+if [ -n "$TARGETS" ]; then
+ ant $TARGETS
fi
+# Clear the optional flag on Hadoop dependencies so these dependencies can be
+# included transitively in other projects.
+CORE_POM_MODIFIED=$CORE_POM.new
+./edit_generated_pom.py >$CORE_POM_MODIFIED
+
echo "** Publishing hadoop* core & test jars "
echo "** to "
echo "** your local maven repo (~/.m2/repository). "
echo "** HBase builds will pick up the HDFS* jars from the local maven repo."
mvn install:install-file \
- -DgeneratePom=true \
+ -DpomFile=$CORE_POM_MODIFIED \
-DgroupId=org.apache.hadoop \
-DartifactId=hadoop-core \
- -Dversion=0.20 \
+ -Dversion=$VERSION \
-Dpackaging=jar \
-Dfile=${CORE_JAR}
mvn install:install-file \
-DgeneratePom=true \
-DgroupId=org.apache.hadoop \
-DartifactId=hadoop-test \
- -Dversion=0.20 \
+ -Dversion=$VERSION \
-Dpackaging=jar \
-Dfile=${TEST_JAR}
+
View
60 edit_generated_pom.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+'''
+Reads the automatically generated Hadoop pom file, removes the "optional"
+flag from dependencies so that they could be included transitively into other
+projects such as HBase, and removes certain dependencies that are not required
+and could even break the code (e.g. an old version of xerces). Writes the
+modified project object model XML to standard output.
+'''
+
+import os
+import re
+import sys
+
+from xml.dom.minidom import parse
+
+NON_TRANSITIVE_DEPS = [
+ # Old version, breaks HBase
+ 'xerces',
+
+ # Not used in production
+ 'checkstyle',
+ 'jdiff',
+
+ # A release audit tool, probably not used in prod
+ 'rat-lib',
+]
+
+POM_FILE = 'build/ivy/maven/generated.pom'
+doc = parse(POM_FILE)
+deps = doc.getElementsByTagName('dependencies')[0]
+
+for dep in deps.getElementsByTagName('dependency'):
+ for c in dep.childNodes:
+ if (c.nodeName == 'artifactId' and
+ c.firstChild and
+ c.firstChild.nodeValue and
+ c.firstChild.nodeValue.strip() in NON_TRANSITIVE_DEPS):
+ deps.removeChild(dep)
+ break
+
+ for o in dep.getElementsByTagName('optional'):
+ dep.removeChild(o)
+
+out_lines = doc.toprettyxml(indent=' ' * 2)
+lines = []
+for l in out_lines.split('\n'):
+ l = l.rstrip()
+ if l:
+ lines.append(l)
+output = '\n'.join(lines)
+
+# Make sure values stay on the same line: <element>value</element>
+output = re.sub(
+ r'(<([a-zA-Z]+)>)'
+ r'\s*([^<>]+?)\s*'
+ r'(</\2>)', r'\1\3\4', output)
+
+print output
+

0 comments on commit eb7d5cd

Please sign in to comment.