Skip to content
This repository has been archived by the owner on Jan 13, 2022. It is now read-only.

Commit

Permalink
Sync github production to FB HDFS cheetah branch
Browse files Browse the repository at this point in the history
  • Loading branch information
pritamdamania87 committed Sep 11, 2013
1 parent ef7609e commit 84dc665
Show file tree
Hide file tree
Showing 1,460 changed files with 271,744 additions and 39,938 deletions.
30 changes: 16 additions & 14 deletions README.txt
Expand Up @@ -7,18 +7,20 @@ This code is based on Apache Hadoop 0.20/Apache Hadoop 0.20-append.
FB-CHANGES.txt contains the additional pathches that have been committed to
the original code base.

PLEASE NOTE:

* This distribution includes cryptographic software that
is subject to U.S. export control laws and applicable
export and import laws of other countries. BEFORE using
any software made available from this site, it is your
responsibility to understand and comply with these laws.
This software is being exported in accordance with the
Export Administration Regulations. As of June 2009, you
are prohibited from exporting and re-exporting this
software to Cuba, Iran, North Korea, Sudan, Syria and
any other countries specified by regulatory update to
the U.S. export control laws and regulations. Diversion
contrary to U.S. law is prohibited.
For instructions on starting a Corona cluster, see
https://github.com/facebook/hadoop-20/wiki/Corona-Single-Node-Setup.

PLEASE NOTE:

* This distribution includes cryptographic software that
is subject to U.S. export control laws and applicable
export and import laws of other countries. BEFORE using
any software made available from this site, it is your
responsibility to understand and comply with these laws.
This software is being exported in accordance with the
Export Administration Regulations. As of June 2009, you
are prohibited from exporting and re-exporting this
software to Cuba, Iran, North Korea, Sudan, Syria and
any other countries specified by regulatory update to
the U.S. export control laws and regulations. Diversion
contrary to U.S. law is prohibited.
100 changes: 73 additions & 27 deletions bin/hadoop
Expand Up @@ -41,7 +41,12 @@
# HADOOP_ROOT_LOGGER The root appender. Default is INFO,console
#

bin=`dirname "$0"`
thisfile="${BASH_SOURCE[0]}"
if [ -L "${thisfile}" ]; then
thisfile=$(readlink "${thisfile}")
fi

bin=`dirname "${thisfile}"`
bin=`cd "$bin"; pwd`

. "$bin"/hadoop-config.sh
Expand Down Expand Up @@ -72,6 +77,7 @@ if [ $# = 0 ]; then
echo " jmxget get JMX exported values from NameNode or DataNode."
echo " oiv apply the offline fsimage viewer to an fsimage"
echo " oev apply the offline edits viewer to an edits file"
echo " oid apply the offline fsimage decompressor to an fsimage"
echo " Use -help to see options"
echo " jobtracker run the MapReduce job Tracker node"
echo " pipes run a Pipes job"
Expand All @@ -80,6 +86,7 @@ if [ $# = 0 ]; then
echo " queue get information regarding JobQueues"
echo " version print the version"
echo " jar <jar> run a jar file"
echo " onejar <jar> run a jar file packaged using one-jar, do not specify name of main class"
echo " distcp <srcurl> <desturl> copy file or directories recursively"
echo " fastcopy <src file> <dest file> copy files by maintaining optimal locality"
echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
Expand Down Expand Up @@ -123,6 +130,7 @@ fi
# CLASSPATH initially contains $HADOOP_CONF_DIR
JMX_OPTS=""
CLASSPATH="${HADOOP_CONF_DIR}"
CLASSPATH=${CLASSPATH}:$HADOOP_CLASSPATH
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar

# for developers, add Hadoop classes to CLASSPATH
Expand Down Expand Up @@ -153,17 +161,6 @@ for f in $HADOOP_HOME/hadoop-*-core.jar; do
CLASSPATH=${CLASSPATH}:$f;
done

# add libs to CLASSPATH
for f in $HADOOP_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done

if [ -d "$HADOOP_HOME/build/ivy/lib/Hadoop/common" ]; then
for f in $HADOOP_HOME/build/ivy/lib/Hadoop/common/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
fi

for f in $HADOOP_HOME/lib/jsp-2.1/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
Expand All @@ -175,12 +172,6 @@ for f in $HADOOP_HOME/build/hadoop-*-tools.jar; do
TOOL_PATH=${TOOL_PATH}:$f;
done

# add user-specified CLASSPATH before corona so that a newer
# corona jar can be specified to override the deployed one
if [ "$HADOOP_CLASSPATH" != "" ]; then
CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
fi

# CORONA_PATH for corona daemons
if [ -d "$HADOOP_HOME/build/contrib/corona/classes" ]; then
CORONA_PATH=${CORONA_PATH}:$HADOOP_HOME/build/contrib/corona/classes
Expand All @@ -190,14 +181,19 @@ for f in $HADOOP_HOME/contrib/corona/*.jar; do
CORONA_PATH=${CORONA_PATH}:$f;
done

if [ "$CORONA_PATH" != "" ]; then
CLASSPATH=${CLASSPATH}:${CORONA_PATH}
fi

for f in $HADOOP_HOME/contrib/corona/lib/*.jar; do
CORONA_LIB_PATH=${CORONA_LIB_PATH}:$f;
done

# NOTIFIER_PATH for the namespace notifier server daemon
if [ -d "$HADOOP_HOME/build/contrib/namespace-notifier/classes" ]; then
NOTIFIER_PATH=${NOTIFIER_PATH}:$HADOOP_HOME/build/contrib/namespace-notifier/classes
fi

for f in $HADOOP_HOME/contrib/namespace-notifier/*.jar; do
NOTIFIER_PATH=${NOTIFIER_PATH}:$f;
done

# default log directory & file
if [ "$HADOOP_LOG_DIR" = "" ]; then
HADOOP_LOG_DIR="$HADOOP_HOME/logs"
Expand Down Expand Up @@ -240,9 +236,25 @@ elif [ "$COMMAND" = "avatarnode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.AvatarNode'
JMX_OPTS=$HADOOP_JMX_NAMENODE_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_NAMENODE_OPTS"
elif [ "$COMMAND" = "journalnode" ] ; then
CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
JMX_OPTS=$HADOOP_JMX_JOURNAL_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_JOURNAL_OPTS"
elif [ "$COMMAND" = "secondarynamenode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
elif [ "$COMMAND" = "raidnode" ] ; then
CLASS='org.apache.hadoop.raid.RaidNode'
JMX_OPTS=$HADOOP_JMX_RAIDNODE_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS"
CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "notifier" ] ; then
CLASS='org.apache.hadoop.hdfs.notifier.server.ServerCore'
if [ "$NOTIFIER_PATH" != "" ]; then
CLASSPATH=${CLASSPATH}:${NOTIFIER_PATH}
fi
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $NOTIFIER_OPTS"
JMX_OPTS="$JMX_OPTS $NOTIFIER_JMX_OPTS"
elif [ "$COMMAND" = "fsshellservice" ] ; then
CLASS='org.apache.hadoop.hdfs.fsshellservice.FsShellServiceImpl'
if [ -d "$HADOOP_HOME/build/contrib/corona/lib" ]; then
Expand All @@ -257,6 +269,19 @@ elif [ "$COMMAND" = "fsshellservice" ] ; then
CLASSPATH=${CLASSPATH}:$f;
done
CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "bittorrent" ] ; then
CLASS='org.apache.hadoop.hdfs.bittorrent.BittorrentServiceImpl'
# Enable assertions for bittorrent
HADOOP_OPTS="$HADOOP_OPTS -ea:org.apache.hadoop.hdfs.bittorrent..."
if [ -d "$HADOOP_HOME/build/contrib/bittorrent/" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/contrib/bittorrent/classes
fi
elif [ "$COMMAND" = "bittorrentclient" ] ; then
CLASS='org.apache.hadoop.hdfs.bittorrent.BittorrentServiceClient'
if [ -d "$HADOOP_HOME/build/contrib/bittorrent/" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/contrib/bittorrent/classes
CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/contrib/bittorrent/test
fi
elif [ "$COMMAND" = "avatardatanode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.datanode.AvatarDataNode'
JMX_OPTS=$HADOOP_JMX_DATANODE_OPTS
Expand Down Expand Up @@ -295,6 +320,9 @@ elif [ "$COMMAND" = "raidfsck" ] ; then
elif [ "$COMMAND" = "raidshell" ] ; then
CLASS=org.apache.hadoop.raid.RaidShell
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "notifiershell" ] ; then
CLASS=org.apache.hadoop.hdfs.notifier.tools.NotifierShell
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "balancer" ] ; then
CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
Expand All @@ -309,6 +337,9 @@ elif [ "$COMMAND" = "oiv" ] ; then
elif [ "$COMMAND" = "oev" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "oid" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageDecompressor
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "jmxget" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
Expand All @@ -324,17 +355,26 @@ elif [ "$COMMAND" = "coronaclustermanager" ] ; then
JMX_OPTS=$HADOOP_JMX_CORONACLUSTERMANAGER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_CORONACLUSTERMANAGER_OPTS"
# Corona lib path should be first to ensure that it uses the right thrift JAR
CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
CLASSPATH=${CORONA_LIB_PATH}:${CLUSTER_MANAGER_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronatasktracker" ] ; then
CLASS=org.apache.hadoop.mapred.CoronaTaskTracker
JMX_OPTS=$HADOOP_JMX_TASKTRACKER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_TASKTRACKER_OPTS"
# For corona task trackers, the tasks should not get the thrift library.
MAPREDUCE_TASK_SYSTEM_CLASSPATH=${CLASSPATH}
export MAPREDUCE_TASK_SYSTEM_CLASSPATH
# See coronaclustermanager comment
CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronaproxyjobtracker" ] ; then
CLASS=org.apache.hadoop.mapred.ProxyJobTracker
JMX_OPTS=$HADOOP_JMX_CORONAPROXYJOBTRACKER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_CORONAPROXYJOBTRACKER_OPTS"
# See coronaclustermanager comment
CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronaclient" ] ; then
CLASS=org.apache.hadoop.corona.CoronaClient
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronaadmin" ] ; then
CLASS=org.apache.hadoop.corona.CoronaAdmin
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
Expand All @@ -350,9 +390,10 @@ elif [ "$COMMAND" = "tasktracker" ] ; then
elif [ "$COMMAND" = "multitasktracker" ] ; then
CLASS=org.apache.hadoop.mapred.MultiTaskTracker
HADOOP_ROOT_LOGGER=${HADOOP_TASKTRACKER_LOGGER:-$HADOOP_ROOT_LOGGER}
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_MULTITASKTRACKER_OPTS"
# This should be the number of tasktrackers
if [ -n "$MULTI_TT_SIZE" ] ; then
CMDLINE_OPTS="$MULTI_TT_SIZE"
if [ -n "$MULTI_TT_OPTIONS" ] ; then
CMDLINE_OPTS="$MULTI_TT_OPTIONS"
else
CMDLINE_OPTS="1"
fi
Expand All @@ -366,6 +407,9 @@ elif [ "$COMMAND" = "pipes" ] ; then
elif [ "$COMMAND" = "version" ] ; then
CLASS=org.apache.hadoop.util.VersionInfo
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "onejar" ] ; then
CLASS=org.apache.hadoop.util.RunJar
HADOOP_OPTS="$HADOOP_OPTS -Done-jar.jar.path=$1"
elif [ "$COMMAND" = "jar" ] ; then
CLASS=org.apache.hadoop.util.RunJar
elif [ "$COMMAND" = "fastcopy" ] ; then
Expand All @@ -374,7 +418,7 @@ elif [ "$COMMAND" = "fastcopy" ] ; then
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "distcp" ] ; then
CLASS=org.apache.hadoop.tools.DistCp
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}:${TOOL_PATH}
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "daemonlog" ] ; then
CLASS=org.apache.hadoop.log.LogLevel
Expand Down Expand Up @@ -463,4 +507,6 @@ if [ "$HADOOP_DEPLOYMENT" == "server" ]; then
fi

# run it
exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $JMX_OPTS -classpath "$CLASSPATH" $CLASS $CMDLINE_OPTS "$@"
export CLASSPATH
export JVM_PID=$$
exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $JMX_OPTS "-Dfb_hadoop_version=0.20" $CLASS $CMDLINE_OPTS "$@"
107 changes: 107 additions & 0 deletions bin/mount-hdfs.sh
@@ -0,0 +1,107 @@
#!/bin/bash

# Mount an HDFS uri into a local directory. Start up an instance of the proxy
# if there isn't one already running.

hdfs_uri=$1
local_dir=$2

START_PORT=34550
END_PORT=34700

# Needs to be root user to restart rpcidmapd. Will sudo as hadoop later
if [ $UID -ne 0 ]; then
echo "Error: must run as root user"
exit 1
fi

# Does hadoop user exist on system?
if ! /usr/bin/getent passwd hadoop >/dev/null 2>/dev/null; then echo
echo "Error: hadoop user does not exist on system."
exit 1
fi

# Find a free port to bind to between the start and end ports
# Return 0 if nothing was free in that range
find_free_port() {
start=$1
end=$2
port=$start
while true; do
free=$(lsof -iTCP:$port | wc -l)
if [ $free == "0" ]; then
break
fi
port=$(( $port + 1))
if [ $port -gt $end ]; then
port=0
break
fi
done
echo $port
}

# Get a port of an existing NFS proxy. If there isn't one, return 0
get_existing_port() {
running_pid=$(/usr/bin/pgrep -f org.apache.hadoop.hdfs.nfs.nfs4.NFS4Server)
if [ $? != "0" ]; then
echo "0"
return
fi

if [ $(echo "${running_pid}" | wc -l) != "1" ]; then
# More than one proxy. What's going on?
exit 6
fi

port=$(/bin/awk -F'\0' '{ print $(NF-1) }' /proc/$running_pid/cmdline)
if ! echo "${port}" | /bin/egrep -q '^[0-9]+$'; then
# Command line looks weird. What's going on?
exit 7
fi

echo ${port}
}

# Start up an instance of the proxy
start_proxy() {
# Pick a free port to run on
free_port=$(find_free_port $START_PORT $END_PORT)
if [ $free_port -eq 0 ]; then
echo "Error: could not find a free port"
exit 4
fi
$(dirname ${BASH_SOURCE[0]})/start-nfs-server.sh $free_port >/dev/null 2>/dev/null
sleep 5
echo $free_port
}

if [ $# -ne 2 ]; then
echo "Usage: $0 <hdfs uri> <directory>"
echo
echo " Mounts the HDFS location into the local directory"
echo
exit 1
fi

if ! echo $1 | /bin/egrep -q "^hdfs://[^:/]+:[0-9]+/.+$"; then
echo "Error: HDFS URI '$hdfs_uri' is not valid"
exit 2
fi

short_uri=$(echo "${hdfs_uri}" | sed -e 's/^hdfs:\/\/*//' -e 's/^\([^:]*\):\([0-9]*\)/\1.\2/')

if [ ! -d "${local_dir}" ]; then
echo "Error: Directory '${local_dir}' does not exist"
exit 3
fi

existing_port=$(get_existing_port)

if [ $existing_port == "0" ]; then
existing_port=$(start_proxy)
fi

/bin/mount -t nfs4 "localhost:/${short_uri}" "${local_dir}" -o rw,intr,port=${existing_port}

exit $?
5 changes: 3 additions & 2 deletions bin/start-corona.sh
Expand Up @@ -24,7 +24,8 @@ bin=`cd "$bin"; pwd`
. "$bin"/hadoop-config.sh

# start corona daemons
# start clustermanager first to minimize connection errors at startup
"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start coronaclustermanager
# start start-proxyjt.sh first so that clustermanager can be started correctly
"$bin"/start-proxyjt.sh --config $HADOOP_CONF_DIR
sleep 1
"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start coronaclustermanager
"$bin"/hadoop-daemons.sh --config $HADOOP_CONF_DIR start coronatasktracker

0 comments on commit 84dc665

Please sign in to comment.