Permalink
Browse files

Sync github production to FB HDFS cheetah branch

  • Loading branch information...
1 parent ef7609e commit 84dc6654cdea0846aa56caac4bb8da13d7c23f82 Pritam Damania committed Sep 11, 2013
Showing 1,460 changed files with 271,744 additions and 39,938 deletions.
View
@@ -7,18 +7,20 @@ This code is based on Apache Hadoop 0.20/Apache Hadoop 0.20-append.
FB-CHANGES.txt contains the additional pathches that have been committed to
the original code base.
-PLEASE NOTE:
-
- * This distribution includes cryptographic software that
- is subject to U.S. export control laws and applicable
- export and import laws of other countries. BEFORE using
- any software made available from this site, it is your
- responsibility to understand and comply with these laws.
- This software is being exported in accordance with the
- Export Administration Regulations. As of June 2009, you
- are prohibited from exporting and re-exporting this
- software to Cuba, Iran, North Korea, Sudan, Syria and
- any other countries specified by regulatory update to
- the U.S. export control laws and regulations. Diversion
- contrary to U.S. law is prohibited.
+For instructions on starting a Corona cluster, see
+https://github.com/facebook/hadoop-20/wiki/Corona-Single-Node-Setup.
+PLEASE NOTE:
+
+ * This distribution includes cryptographic software that
+ is subject to U.S. export control laws and applicable
+ export and import laws of other countries. BEFORE using
+ any software made available from this site, it is your
+ responsibility to understand and comply with these laws.
+ This software is being exported in accordance with the
+ Export Administration Regulations. As of June 2009, you
+ are prohibited from exporting and re-exporting this
+ software to Cuba, Iran, North Korea, Sudan, Syria and
+ any other countries specified by regulatory update to
+ the U.S. export control laws and regulations. Diversion
+ contrary to U.S. law is prohibited.
View
@@ -41,7 +41,12 @@
# HADOOP_ROOT_LOGGER The root appender. Default is INFO,console
#
-bin=`dirname "$0"`
+thisfile="${BASH_SOURCE[0]}"
+if [ -L "${thisfile}" ]; then
+ thisfile=$(readlink "${thisfile}")
+fi
+
+bin=`dirname "${thisfile}"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-config.sh
@@ -72,6 +77,7 @@ if [ $# = 0 ]; then
echo " jmxget get JMX exported values from NameNode or DataNode."
echo " oiv apply the offline fsimage viewer to an fsimage"
echo " oev apply the offline edits viewer to an edits file"
+ echo " oid apply the offline fsimage decompressor to an fsimage"
echo " Use -help to see options"
echo " jobtracker run the MapReduce job Tracker node"
echo " pipes run a Pipes job"
@@ -80,6 +86,7 @@ if [ $# = 0 ]; then
echo " queue get information regarding JobQueues"
echo " version print the version"
echo " jar <jar> run a jar file"
+ echo " onejar <jar> run a jar file packaged using one-jar, do not specify name of main class"
echo " distcp <srcurl> <desturl> copy file or directories recursively"
echo " fastcopy <src file> <dest file> copy files by maintaining optimal locality"
echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
@@ -123,6 +130,7 @@ fi
# CLASSPATH initially contains $HADOOP_CONF_DIR
JMX_OPTS=""
CLASSPATH="${HADOOP_CONF_DIR}"
+CLASSPATH=${CLASSPATH}:$HADOOP_CLASSPATH
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
# for developers, add Hadoop classes to CLASSPATH
@@ -153,17 +161,6 @@ for f in $HADOOP_HOME/hadoop-*-core.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
-# add libs to CLASSPATH
-for f in $HADOOP_HOME/lib/*.jar; do
- CLASSPATH=${CLASSPATH}:$f;
-done
-
-if [ -d "$HADOOP_HOME/build/ivy/lib/Hadoop/common" ]; then
-for f in $HADOOP_HOME/build/ivy/lib/Hadoop/common/*.jar; do
- CLASSPATH=${CLASSPATH}:$f;
-done
-fi
-
for f in $HADOOP_HOME/lib/jsp-2.1/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
@@ -175,12 +172,6 @@ for f in $HADOOP_HOME/build/hadoop-*-tools.jar; do
TOOL_PATH=${TOOL_PATH}:$f;
done
-# add user-specified CLASSPATH before corona so that a newer
-# corona jar can be specified to override the deployed one
-if [ "$HADOOP_CLASSPATH" != "" ]; then
- CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
-fi
-
# CORONA_PATH for corona daemons
if [ -d "$HADOOP_HOME/build/contrib/corona/classes" ]; then
CORONA_PATH=${CORONA_PATH}:$HADOOP_HOME/build/contrib/corona/classes
@@ -190,14 +181,19 @@ for f in $HADOOP_HOME/contrib/corona/*.jar; do
CORONA_PATH=${CORONA_PATH}:$f;
done
-if [ "$CORONA_PATH" != "" ]; then
- CLASSPATH=${CLASSPATH}:${CORONA_PATH}
-fi
-
for f in $HADOOP_HOME/contrib/corona/lib/*.jar; do
CORONA_LIB_PATH=${CORONA_LIB_PATH}:$f;
done
+# NOTIFIER_PATH for the namespace notifier server daemon
+if [ -d "$HADOOP_HOME/build/contrib/namespace-notifier/classes" ]; then
+ NOTIFIER_PATH=${NOTIFIER_PATH}:$HADOOP_HOME/build/contrib/namespace-notifier/classes
+fi
+
+for f in $HADOOP_HOME/contrib/namespace-notifier/*.jar; do
+ NOTIFIER_PATH=${NOTIFIER_PATH}:$f;
+done
+
# default log directory & file
if [ "$HADOOP_LOG_DIR" = "" ]; then
HADOOP_LOG_DIR="$HADOOP_HOME/logs"
@@ -240,9 +236,25 @@ elif [ "$COMMAND" = "avatarnode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.AvatarNode'
JMX_OPTS=$HADOOP_JMX_NAMENODE_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_NAMENODE_OPTS"
+elif [ "$COMMAND" = "journalnode" ] ; then
+ CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
+ JMX_OPTS=$HADOOP_JMX_JOURNAL_OPTS
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_JOURNAL_OPTS"
elif [ "$COMMAND" = "secondarynamenode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
+elif [ "$COMMAND" = "raidnode" ] ; then
+ CLASS='org.apache.hadoop.raid.RaidNode'
+ JMX_OPTS=$HADOOP_JMX_RAIDNODE_OPTS
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS"
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
+elif [ "$COMMAND" = "notifier" ] ; then
+ CLASS='org.apache.hadoop.hdfs.notifier.server.ServerCore'
+ if [ "$NOTIFIER_PATH" != "" ]; then
+ CLASSPATH=${CLASSPATH}:${NOTIFIER_PATH}
+ fi
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $NOTIFIER_OPTS"
+ JMX_OPTS="$JMX_OPTS $NOTIFIER_JMX_OPTS"
elif [ "$COMMAND" = "fsshellservice" ] ; then
CLASS='org.apache.hadoop.hdfs.fsshellservice.FsShellServiceImpl'
if [ -d "$HADOOP_HOME/build/contrib/corona/lib" ]; then
@@ -257,6 +269,19 @@ elif [ "$COMMAND" = "fsshellservice" ] ; then
CLASSPATH=${CLASSPATH}:$f;
done
CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
+elif [ "$COMMAND" = "bittorrent" ] ; then
+ CLASS='org.apache.hadoop.hdfs.bittorrent.BittorrentServiceImpl'
+ # Enable assertions for bittorrent
+ HADOOP_OPTS="$HADOOP_OPTS -ea:org.apache.hadoop.hdfs.bittorrent..."
+ if [ -d "$HADOOP_HOME/build/contrib/bittorrent/" ]; then
+ CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/contrib/bittorrent/classes
+ fi
+elif [ "$COMMAND" = "bittorrentclient" ] ; then
+ CLASS='org.apache.hadoop.hdfs.bittorrent.BittorrentServiceClient'
+ if [ -d "$HADOOP_HOME/build/contrib/bittorrent/" ]; then
+ CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/contrib/bittorrent/classes
+ CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/contrib/bittorrent/test
+ fi
elif [ "$COMMAND" = "avatardatanode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.datanode.AvatarDataNode'
JMX_OPTS=$HADOOP_JMX_DATANODE_OPTS
@@ -295,6 +320,9 @@ elif [ "$COMMAND" = "raidfsck" ] ; then
elif [ "$COMMAND" = "raidshell" ] ; then
CLASS=org.apache.hadoop.raid.RaidShell
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "notifiershell" ] ; then
+ CLASS=org.apache.hadoop.hdfs.notifier.tools.NotifierShell
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "balancer" ] ; then
CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
@@ -309,6 +337,9 @@ elif [ "$COMMAND" = "oiv" ] ; then
elif [ "$COMMAND" = "oev" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "oid" ] ; then
+ CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageDecompressor
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "jmxget" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
@@ -324,17 +355,26 @@ elif [ "$COMMAND" = "coronaclustermanager" ] ; then
JMX_OPTS=$HADOOP_JMX_CORONACLUSTERMANAGER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_CORONACLUSTERMANAGER_OPTS"
# Corona lib path should be first to ensure that it uses the right thrift JAR
- CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
+ CLASSPATH=${CORONA_LIB_PATH}:${CLUSTER_MANAGER_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronatasktracker" ] ; then
CLASS=org.apache.hadoop.mapred.CoronaTaskTracker
JMX_OPTS=$HADOOP_JMX_TASKTRACKER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_TASKTRACKER_OPTS"
+ # For corona task trackers, the tasks should not get the thrift library.
+ MAPREDUCE_TASK_SYSTEM_CLASSPATH=${CLASSPATH}
+ export MAPREDUCE_TASK_SYSTEM_CLASSPATH
# See coronaclustermanager comment
CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronaproxyjobtracker" ] ; then
CLASS=org.apache.hadoop.mapred.ProxyJobTracker
JMX_OPTS=$HADOOP_JMX_CORONAPROXYJOBTRACKER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_CORONAPROXYJOBTRACKER_OPTS"
+ # See coronaclustermanager comment
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
+elif [ "$COMMAND" = "coronaclient" ] ; then
+ CLASS=org.apache.hadoop.corona.CoronaClient
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronaadmin" ] ; then
CLASS=org.apache.hadoop.corona.CoronaAdmin
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
@@ -350,9 +390,10 @@ elif [ "$COMMAND" = "tasktracker" ] ; then
elif [ "$COMMAND" = "multitasktracker" ] ; then
CLASS=org.apache.hadoop.mapred.MultiTaskTracker
HADOOP_ROOT_LOGGER=${HADOOP_TASKTRACKER_LOGGER:-$HADOOP_ROOT_LOGGER}
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_MULTITASKTRACKER_OPTS"
# This should be the number of tasktrackers
- if [ -n "$MULTI_TT_SIZE" ] ; then
- CMDLINE_OPTS="$MULTI_TT_SIZE"
+ if [ -n "$MULTI_TT_OPTIONS" ] ; then
+ CMDLINE_OPTS="$MULTI_TT_OPTIONS"
else
CMDLINE_OPTS="1"
fi
@@ -366,6 +407,9 @@ elif [ "$COMMAND" = "pipes" ] ; then
elif [ "$COMMAND" = "version" ] ; then
CLASS=org.apache.hadoop.util.VersionInfo
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "onejar" ] ; then
+ CLASS=org.apache.hadoop.util.RunJar
+ HADOOP_OPTS="$HADOOP_OPTS -Done-jar.jar.path=$1"
elif [ "$COMMAND" = "jar" ] ; then
CLASS=org.apache.hadoop.util.RunJar
elif [ "$COMMAND" = "fastcopy" ] ; then
@@ -374,7 +418,7 @@ elif [ "$COMMAND" = "fastcopy" ] ; then
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "distcp" ] ; then
CLASS=org.apache.hadoop.tools.DistCp
- CLASSPATH=${CLASSPATH}:${TOOL_PATH}
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}:${TOOL_PATH}
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "daemonlog" ] ; then
CLASS=org.apache.hadoop.log.LogLevel
@@ -463,4 +507,6 @@ if [ "$HADOOP_DEPLOYMENT" == "server" ]; then
fi
# run it
-exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $JMX_OPTS -classpath "$CLASSPATH" $CLASS $CMDLINE_OPTS "$@"
+export CLASSPATH
+export JVM_PID=$$
+exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $JMX_OPTS "-Dfb_hadoop_version=0.20" $CLASS $CMDLINE_OPTS "$@"
View
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+# Mount an HDFS uri into a local directory. Start up an instance of the proxy
+# if there isn't one already running.
+
+hdfs_uri=$1
+local_dir=$2
+
+START_PORT=34550
+END_PORT=34700
+
+# Needs to be root user to restart rpcidmapd. Will sudo as hadoop later
+if [ $UID -ne 0 ]; then
+ echo "Error: must run as root user"
+ exit 1
+fi
+
+# Does hadoop user exist on system?
+if ! /usr/bin/getent passwd hadoop >/dev/null 2>/dev/null; then echo
+ echo "Error: hadoop user does not exist on system."
+ exit 1
+fi
+
+# Find a free port to bind to between the start and end ports
+# Return 0 if nothing was free in that range
+find_free_port() {
+ start=$1
+ end=$2
+ port=$start
+ while true; do
+ free=$(lsof -iTCP:$port | wc -l)
+ if [ $free == "0" ]; then
+ break
+ fi
+ port=$(( $port + 1))
+ if [ $port -gt $end ]; then
+ port=0
+ break
+ fi
+ done
+ echo $port
+}
+
+# Get a port of an existing NFS proxy. If there isn't one, return 0
+get_existing_port() {
+ running_pid=$(/usr/bin/pgrep -f org.apache.hadoop.hdfs.nfs.nfs4.NFS4Server)
+ if [ $? != "0" ]; then
+ echo "0"
+ return
+ fi
+
+ if [ $(echo "${running_pid}" | wc -l) != "1" ]; then
+ # More than one proxy. What's going on?
+ exit 6
+ fi
+
+ port=$(/bin/awk -F'\0' '{ print $(NF-1) }' /proc/$running_pid/cmdline)
+ if ! echo "${port}" | /bin/egrep -q '^[0-9]+$'; then
+ # Command line looks weird. What's going on?
+ exit 7
+ fi
+
+ echo ${port}
+}
+
+# Start up an instance of the proxy
+start_proxy() {
+ # Pick a free port to run on
+ free_port=$(find_free_port $START_PORT $END_PORT)
+ if [ $free_port -eq 0 ]; then
+ echo "Error: could not find a free port"
+ exit 4
+ fi
+ $(dirname ${BASH_SOURCE[0]})/start-nfs-server.sh $free_port >/dev/null 2>/dev/null
+ sleep 5
+ echo $free_port
+}
+
+if [ $# -ne 2 ]; then
+ echo "Usage: $0 <hdfs uri> <directory>"
+ echo
+ echo " Mounts the HDFS location into the local directory"
+ echo
+ exit 1
+fi
+
+if ! echo $1 | /bin/egrep -q "^hdfs://[^:/]+:[0-9]+/.+$"; then
+ echo "Error: HDFS URI '$hdfs_uri' is not valid"
+ exit 2
+fi
+
+short_uri=$(echo "${hdfs_uri}" | sed -e 's/^hdfs:\/\/*//' -e 's/^\([^:]*\):\([0-9]*\)/\1.\2/')
+
+if [ ! -d "${local_dir}" ]; then
+ echo "Error: Directory '${local_dir}' does not exist"
+ exit 3
+fi
+
+existing_port=$(get_existing_port)
+
+if [ $existing_port == "0" ]; then
+ existing_port=$(start_proxy)
+fi
+
+/bin/mount -t nfs4 "localhost:/${short_uri}" "${local_dir}" -o rw,intr,port=${existing_port}
+
+exit $?
View
@@ -24,7 +24,8 @@ bin=`cd "$bin"; pwd`
. "$bin"/hadoop-config.sh
# start corona daemons
-# start clustermanager first to minimize connection errors at startup
-"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start coronaclustermanager
+# start start-proxyjt.sh first so that clustermanager can be started correctly
"$bin"/start-proxyjt.sh --config $HADOOP_CONF_DIR
+sleep 1
+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start coronaclustermanager
"$bin"/hadoop-daemons.sh --config $HADOOP_CONF_DIR start coronatasktracker
Oops, something went wrong.

0 comments on commit 84dc665

Please sign in to comment.