Permalink
Browse files

Updated the Hadoop tutorial, including adding support for cdh3u3.

git-svn-id: https://svn.apache.org/repos/asf/incubator/mesos/trunk@1332469 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
1 parent b56cd4f commit 62418bea822952f0e385bfc5674387a6b911fcc1 @benh benh committed Apr 30, 2012
View
@@ -18,11 +18,11 @@ ACLOCAL_AMFLAGS = -I m4
AUTOMAKE_OPTIONS = foreign
-SUBDIRS = . third_party src
+SUBDIRS = . third_party src ec2 hadoop
EXTRA_DIST =
-PHONY_TARGETS = # Initialized to empty.
+PHONY_TARGETS =
# Since we generate several files in src/ with config.status, make
@@ -31,38 +31,14 @@ all-recursive: src/python/setup.py src/java/mesos.pom
# Standard stuff.
-EXTRA_DIST += bootstrap DISCLAIMER LICENSE NOTICE README support/colors.sh
+EXTRA_DIST += bootstrap LICENSE NOTICE README support/colors.sh
# Extra configure scripts.
EXTRA_DIST += configure.amazon-linux-64 configure.centos-5.4-64 \
configure.macosx configure.ubuntu-lucid-64 configure.ubuntu-natty-64
-# We include support for Hadoop on Mesos in the distribution.
-EXTRA_DIST += hadoop/TUTORIAL.sh hadoop/hadoop-0.20.205.0.patch \
- hadoop/hadoop-0.20.205.0.tar.gz \
- hadoop/hadoop-0.20.205.0_conf_hadoop-env.sh.patch \
- hadoop/hadoop-0.20.205.0_conf_mapred-site.xml.patch
-
-# Defines a target which runs the Hadoop tutorial to make sure
-# everything works. At some point we might want to do this
-# automagically (i.e., as part of 'make check'). Note that we set the
-# environment variable TMOUT to 1 so that each prompt in the tutorial
-# will return after 1 second so no interaction from the user is
-# required.
-hadoop: all
- @if test "$(top_srcdir)" != "$(top_builddir)"; then \
- rm -rf hadoop; \
- cp -rpf $(srcdir)/hadoop hadoop; \
- fi
- @TMOUT=1 JAVA_HOME=$(JAVA_HOME) ./hadoop/TUTORIAL.sh
-
-# TODO(benh): Cleanup (i.e., via 'clean-local') for hadoop target.
-
-PHONY_TARGETS += hadoop
-
-
if HAS_JAVA
maven-install:
@cd src && $(MAKE) $(AM_MAKEFLAGS) maven-install
@@ -71,47 +47,4 @@ PHONY_TARGETS += maven-install
endif
-# EC2 support.
-EXTRA_DIST += ec2/mesos-ec2 ec2/mesos_ec2.py
-
-EXTRA_DIST += \
- ec2/deploy.amazon64/root/ephemeral-hdfs/conf/core-site.xml \
- ec2/deploy.amazon64/root/ephemeral-hdfs/conf/hadoop-env.sh \
- ec2/deploy.amazon64/root/ephemeral-hdfs/conf/hdfs-site.xml \
- ec2/deploy.amazon64/root/ephemeral-hdfs/conf/mapred-site.xml \
- ec2/deploy.amazon64/root/ephemeral-hdfs/conf/masters \
- ec2/deploy.amazon64/root/ephemeral-hdfs/conf/slaves \
- ec2/deploy.amazon64/root/mesos-ec2/cluster-url \
- ec2/deploy.amazon64/root/mesos-ec2/copy-dir \
- ec2/deploy.amazon64/root/mesos-ec2/create-swap \
- ec2/deploy.amazon64/root/mesos-ec2/hadoop-framework-conf/core-site.xml \
- ec2/deploy.amazon64/root/mesos-ec2/hadoop-framework-conf/hadoop-env.sh \
- ec2/deploy.amazon64/root/mesos-ec2/hadoop-framework-conf/mapred-site.xml \
- ec2/deploy.amazon64/root/mesos-ec2/haproxy+apache/haproxy.config.template \
- ec2/deploy.amazon64/root/mesos-ec2/hypertable/Capfile \
- ec2/deploy.amazon64/root/mesos-ec2/hypertable/hypertable.cfg \
- ec2/deploy.amazon64/root/mesos-ec2/masters \
- ec2/deploy.amazon64/root/mesos-ec2/mesos-daemon \
- ec2/deploy.amazon64/root/mesos-ec2/redeploy-mesos \
- ec2/deploy.amazon64/root/mesos-ec2/setup \
- ec2/deploy.amazon64/root/mesos-ec2/setup-slave \
- ec2/deploy.amazon64/root/mesos-ec2/setup-torque \
- ec2/deploy.amazon64/root/mesos-ec2/slaves \
- ec2/deploy.amazon64/root/mesos-ec2/ssh-no-keychecking \
- ec2/deploy.amazon64/root/mesos-ec2/start-hypertable \
- ec2/deploy.amazon64/root/mesos-ec2/start-mesos \
- ec2/deploy.amazon64/root/mesos-ec2/stop-hypertable \
- ec2/deploy.amazon64/root/mesos-ec2/stop-mesos \
- ec2/deploy.amazon64/root/mesos-ec2/zoo \
- ec2/deploy.amazon64/root/persistent-hdfs/conf/core-site.xml \
- ec2/deploy.amazon64/root/persistent-hdfs/conf/hadoop-env.sh \
- ec2/deploy.amazon64/root/persistent-hdfs/conf/hdfs-site.xml \
- ec2/deploy.amazon64/root/persistent-hdfs/conf/mapred-site.xml \
- ec2/deploy.amazon64/root/persistent-hdfs/conf/masters \
- ec2/deploy.amazon64/root/persistent-hdfs/conf/slaves \
- ec2/deploy.amazon64/root/spark/conf/spark-env.sh
-
-EXTRA_DIST += ec2/deploy.generic/root/mesos-ec2/ec2-variables.sh
-
-
.PHONY: $(PHONY_TARGETS)
View
5 README
@@ -112,6 +112,11 @@ Hadoop
Included in the distribution is a runnable tutorial on using Hadoop on
Mesos (./hadoop/TUTORIAL.sh). Try it out!
+You can also "build" a self-contained distribution of Hadoop with the
+necessary Mesos components by doing 'make hadoop-0.20.205.0' or 'make
+hadoop-0.20.2-cdh3u3' from within [build]/hadoop (this uses the
+tutorial mentioned above).
+
Installing
==========
View
@@ -73,6 +73,8 @@ AC_CONFIG_COMMANDS_POST([ac_configure_args="$ac_configure_args_post"])
AC_CONFIG_SUBDIRS([third_party/libprocess])
AC_CONFIG_FILES([Makefile])
+AC_CONFIG_FILES([ec2/Makefile])
+AC_CONFIG_FILES([hadoop/Makefile])
AC_CONFIG_FILES([src/Makefile])
AC_CONFIG_FILES([third_party/Makefile])
View
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+
+# EC2 support.
+EXTRA_DIST = mesos-ec2 mesos_ec2.py
+
+EXTRA_DIST += deploy.amazon64/root/ephemeral-hdfs/conf/core-site.xml \
+ deploy.amazon64/root/ephemeral-hdfs/conf/hadoop-env.sh \
+ deploy.amazon64/root/ephemeral-hdfs/conf/hdfs-site.xml \
+ deploy.amazon64/root/ephemeral-hdfs/conf/mapred-site.xml \
+ deploy.amazon64/root/ephemeral-hdfs/conf/masters \
+ deploy.amazon64/root/ephemeral-hdfs/conf/slaves \
+ deploy.amazon64/root/mesos-ec2/cluster-url \
+ deploy.amazon64/root/mesos-ec2/copy-dir \
+ deploy.amazon64/root/mesos-ec2/create-swap \
+ deploy.amazon64/root/mesos-ec2/hadoop-framework-conf/core-site.xml \
+ deploy.amazon64/root/mesos-ec2/hadoop-framework-conf/hadoop-env.sh \
+ deploy.amazon64/root/mesos-ec2/hadoop-framework-conf/mapred-site.xml \
+ deploy.amazon64/root/mesos-ec2/haproxy+apache/haproxy.config.template \
+ deploy.amazon64/root/mesos-ec2/hypertable/Capfile \
+ deploy.amazon64/root/mesos-ec2/hypertable/hypertable.cfg \
+ deploy.amazon64/root/mesos-ec2/masters \
+ deploy.amazon64/root/mesos-ec2/mesos-daemon \
+ deploy.amazon64/root/mesos-ec2/redeploy-mesos \
+ deploy.amazon64/root/mesos-ec2/setup \
+ deploy.amazon64/root/mesos-ec2/setup-slave \
+ deploy.amazon64/root/mesos-ec2/setup-torque \
+ deploy.amazon64/root/mesos-ec2/slaves \
+ deploy.amazon64/root/mesos-ec2/ssh-no-keychecking \
+ deploy.amazon64/root/mesos-ec2/start-hypertable \
+ deploy.amazon64/root/mesos-ec2/start-mesos \
+ deploy.amazon64/root/mesos-ec2/stop-hypertable \
+ deploy.amazon64/root/mesos-ec2/stop-mesos \
+ deploy.amazon64/root/mesos-ec2/zoo \
+ deploy.amazon64/root/persistent-hdfs/conf/core-site.xml \
+ deploy.amazon64/root/persistent-hdfs/conf/hadoop-env.sh \
+ deploy.amazon64/root/persistent-hdfs/conf/hdfs-site.xml \
+ deploy.amazon64/root/persistent-hdfs/conf/mapred-site.xml \
+ deploy.amazon64/root/persistent-hdfs/conf/masters \
+ deploy.amazon64/root/persistent-hdfs/conf/slaves \
+ deploy.amazon64/root/spark/conf/spark-env.sh
+
+EXTRA_DIST += deploy.generic/root/mesos-ec2/ec2-variables.sh
View
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+
+EXTRA_DIST = TUTORIAL.sh hadoop-0.20.2-cdh3u3.patch \
+ hadoop-0.20.2-cdh3u3_hadoop-env.sh.patch \
+ hadoop-0.20.2-cdh3u3_mesos.patch hadoop-0.20.205.0.patch \
+ hadoop-0.20.205.0_hadoop-env.sh.patch hadoop-0.20.205.0_mesos.patch \
+ mapred-site.xml.patch mesos-executor mesos/build.xml \
+ mesos/ivy/libraries.properties mesos/ivy.xml \
+ mesos/src/java/org/apache/hadoop/mapred/FrameworkExecutor.java \
+ mesos/src/java/org/apache/hadoop/mapred/FrameworkScheduler.java \
+ mesos/src/java/org/apache/hadoop/mapred/HadoopFrameworkMessage.java \
+ mesos/src/java/org/apache/hadoop/mapred/MesosScheduler.java \
+ mesos/src/java/org/apache/hadoop/mapred/MesosTaskTrackerInstrumentation.java
+
+# Defines some targets to run the Hadoop tutorial using a specified
+# distribution. At some point we might want to do this automagically
+# (i.e., as part of 'make check'). Note that we set the environment
+# variable TMOUT to 1 so that each prompt in the tutorial will return
+# after 1 second so no interaction from the user is required.
+hadoop-0.20.205.0:
+ if test "$(top_srcdir)" != "$(top_builddir)"; then \
+ cp -p $(srcdir)/TUTORIAL.sh .; \
+ cp -p $(srcdir)/hadoop-0.20.205.0.patch .; \
+ cp -p $(srcdir)/hadoop-0.20.205.0_hadoop-env.sh.patch .; \
+ cp -p $(srcdir)/hadoop-0.20.205.0_mesos.patch .; \
+ cp -p $(srcdir)/mapred-site.xml.patch .; \
+ cp -rp $(srcdir)/mesos .; \
+ cp -p $(srcdir)/mesos-executor .; \
+ fi
+ -rm -rf hadoop-0.20.205.0
+ @TMOUT=1 JAVA_HOME=$(JAVA_HOME) ./TUTORIAL.sh
+
+hadoop-0.20.2-cdh3u3:
+ if test "$(top_srcdir)" != "$(top_builddir)"; then \
+ cp -p $(srcdir)/TUTORIAL.sh .; \
+ cp -p $(srcdir)/hadoop-0.20.2-cdh3u3.patch .; \
+ cp -p $(srcdir)/hadoop-0.20.2-cdh3u3_hadoop-env.sh.patch .; \
+ cp -p $(srcdir)/hadoop-0.20.2-cdh3u3_mesos.patch .; \
+ cp -p $(srcdir)/mapred-site.xml.patch .; \
+ cp -rp $(srcdir)/mesos .; \
+ cp -p $(srcdir)/mesos-executor .; \
+ fi
+ -rm -rf hadoop-0.20.2-cdh3u3
+ @TMOUT=1 JAVA_HOME=$(JAVA_HOME) ./TUTORIAL.sh 0.20.2-cdh3u3
+
+
+clean-local:
+ -rm -rf hadoop-0.20.2-cdh3u3
+ -rm -f hadoop-0.20.2-cdh3u3.tar.gz
+ -rm -rf hadoop-0.20.205.0
+ -rm -f hadoop-0.20.205.0.tar.gz
+ -rm -f TUTORIAL.sh
+ -rm -f hadoop-0.20.2-cdh3u3.patch
+ -rm -f hadoop-0.20.2-cdh3u3_hadoop-env.sh.patch
+ -rm -f hadoop-0.20.2-cdh3u3_mesos.patch
+ -rm -f hadoop-0.20.205.0.patch
+ -rm -f hadoop-0.20.205.0_hadoop-env.sh.patch
+ -rm -f hadoop-0.20.205.0_mesos.patch
+ -rm -f mapred-site.xml.patch
+ -rm -rf mesos
+ -rm -f mesos-executor
+
+
+.PHONY: hadoop-0.20.205.0 hadoop-0.20.2-cdh3u3
View
@@ -1,139 +0,0 @@
-Welcome to the tutorial on running Apache Hadoop on top of Mesos!
-
-
-Consider running the interactive tutorial at TUTORIAL.sh instead.
- ^
- |
- |
- (try me) +-------------------+
-
-
-Patch the Hadoop distribution
-=============================
-
-We've included the 0.20.205.0 version of Hadoop in this directory
-(hadoop-0.20.205.0.tar.gz). Start by extracting it:
-
- $ tar zxvf hadoop-0.20.205.0.tar.gz
-
-To run Hadoop on Mesos we need to apply a rather minor patch. The
-patch makes a small number of modifications in Hadoop, and adds some
-new code at src/contrib/mesos. (Note that the changes to Hadoop have
-been committed in revisions r1033804 and r987589 so at some point we
-won't need to apply any patch at all.) We'll apply the patch with:
-
- $ patch -p2 <hadoop-0.20.205.0.patch
-
-We'll also need to make one of the new files executable via:
-
- $ chmod +x hadoop-0.20.205.0/bin/mesos-executor
-
-
-Build Hadoop
-============
-
-Okay, now let's change into the directory in order to build Hadoop.
-
- $ cd hadoop-0.20.205.0
-
-Okay, now we're ready to build and then run Hadoop! There are a couple
-important considerations. First, we need to locate the Mesos JAR and
-native library (i.e., libmesos.so on Linux and libmesos.dylib on Mac
-OS X). The Mesos JAR is used for both building and running, while the
-native library is only used for running. In addition, we need to
-locate the Protobuf JAR (if you don't already have one one your
-default classpath).
-
-This tutorial assumes you've built Mesos already. We'll use the
-environment variable MESOS_BUILD_DIR to denote this directory.
-
-Okay, let's try building Hadoop now! We need to let the build system
-know where the Mesos JAR is located by using the MESOS_JAR environment
-variable (i.e., MESOS_JAR=\${MESOS_BUILD_DIR}/src/mesos-x.y.z.jar). We
-can put it on the command line with 'ant like this:
-
- $ MESOS_JAR=${MESOS_BUILD_DIR}/src/mesos-x.y.z.jar ant
-
-
-Configure Hadoop
-================
-
-Build success! Now let's run something!
-
-First we need to configure Hadoop appropriately by modifying
-conf/mapred-site.xml (as is always required when running Hadoop). In
-order to run Hadoop on Mesos we need to set at least these three
-properties:
-
- mapred.job.tracker
-
- mapred.jobtracker.taskScheduler
-
- mapred.mesos.master
-
-The 'mapred.job.tracker' property should be set to the host:port where
-you want to launch the JobTracker (e.g., localhost:54321).
-
-The 'mapred.jobtracker.taskScheduler' property must be set to
-'org.apache.hadoop.mapred.MesosScheduler'.
-
-If you've alredy got a Mesos master running you can use that for
-'mapred.mesos.master', but for this tutorial well just use 'local' in
-order to bring up a Mesos "cluster" within the process. To connect to
-a remote master simply use the Mesos URL used to connect the slave to
-the master (e.g., mesos://master@localhost:5050).
-
-We've got a prepared patch for conf/mapred-site.xml that makes the
-changes necessary to get everything running. We can apply that patch
-like so:
-
- $ patch -p3 <../hadoop-0.20.205.0_conf_mapred-site.xml.patch
-
-
-Set up the runtime environment
-==============================
-
-Now in order to actually run Hadoop we need to set up our environment
-appropriately for Hadoop. We can do this in conf/hadoop-env.sh This
-includes:
-
- (1) Setting JAVA_HOME (unnecessary if JAVA_HOME is set in your environment).
- (2) Adding the Mesos contrib class files to HADOOP_CLASSPATH.
- (3) Adding mesos-x.y.z.jar to the HADOOP_CLASSPATH.
- (4) Adding protobuf-2.3.0.jar to the HADOOP_CLASSPATH.
- (5) Setting MESOS_NATIVE_LIBRARY to point to the native library.
-
-We've got a prepared patch for conf/hadoop-env.sh that makes the
-necessary changes. We can apply that patch like so:
-
- $ patch -p3 <../hadoop-0.20.205.0_conf_hadoop-env.sh.patch
-
-(Note that this patch assumes MESOS_BUILD_DIR is '../..' and you'll
-need to specify that on the command line when you try and run the
-JobTracker if that's not the case ... don't worry, we'll remind you
-again later.)
-
-
-Run a JobTracker
-================
-
-Let's go ahead and try and start the JobTracker via:
-
- $ ./bin/hadoop jobtracker
-
-Note that if you applied our conf/hadoop-env.sh patch we assume that
-MESOS_BUILD_DIR is located at '../..'. If this isn't the case (i.e.,
-you specified a different build directory than the default during this
-tutorial) than you'll need to set that variable either directly in
-conf/hadoop-env.sh or on the command line via:
-
- $ MESOS_BUILD_DIR=/path/to/mesos/build ./bin/hadoop jobtracker
-
-
-Run the wordcount example
-=========================
-
-Alright, now let's run the "wordcount" example via:
-
- $ ./bin/hadoop jar hadoop-examples-0.20.205.0.jar wordcount \
- src/contrib/mesos/src/java/org/apache/hadoop/mapred out
Oops, something went wrong.

0 comments on commit 62418be

Please sign in to comment.