Skip to content
Browse files

Update master to the production release

Summary:
Updated master branch to the production release

Test Plan:

Reviewers:

CC:

Task ID: #

Blame Rev:
  • Loading branch information...
1 parent f324e66 commit e0e27944aaea0e43978975274bdda90bc3087d2c Alex Feinberg committed
Showing with 8,986 additions and 408 deletions.
  1. +2 −0 FB-CHANGES.txt
  2. +45 −6 bin/hadoop
  3. +15 −5 bin/hadoop-daemon.sh
  4. +1 −1 bin/start-corona.sh
  5. +31 −0 bin/start-fsshellservice.sh
  6. +42 −0 bin/start-multitasktracker.sh
  7. +31 −0 bin/stop-fsshellservice.sh
  8. +26 −0 bin/stop-multitasktracker.sh
  9. +1 −1 bin/stop-tasktracker.sh
  10. +62 −7 build.xml
  11. +1 −1 conf/log4j.properties
  12. +135 −0 conf/log4j.properties.scribeappender
  13. +38 −29 copy-hdfs-jars-to-maven.sh
  14. +60 −0 edit_generated_pom.py
  15. +17 −1 ivy.xml
  16. +5 −1 ivy/libraries.properties
  17. +117 −0 singleNodeHadoop/coronaConf/capacity-scheduler.xml
  18. +24 −0 singleNodeHadoop/coronaConf/configuration.xsl
  19. +13 −0 singleNodeHadoop/coronaConf/core-site.xml
  20. +10 −0 singleNodeHadoop/coronaConf/corona.xml
  21. +70 −0 singleNodeHadoop/coronaConf/hadoop-env.sh
  22. +28 −0 singleNodeHadoop/coronaConf/hadoop-metrics.properties
  23. +97 −0 singleNodeHadoop/coronaConf/hadoop-policy.xml
  24. +20 −0 singleNodeHadoop/coronaConf/hdfs-site.xml
  25. +115 −0 singleNodeHadoop/coronaConf/log4j.properties
  26. +31 −0 singleNodeHadoop/coronaConf/mapred-queue-acls.xml
  27. +127 −0 singleNodeHadoop/coronaConf/mapred-site.xml
  28. +1 −0 singleNodeHadoop/coronaConf/masters
  29. +1 −0 singleNodeHadoop/coronaConf/proxyjtnode
  30. +1 −0 singleNodeHadoop/coronaConf/slaves
  31. +57 −0 singleNodeHadoop/coronaConf/ssl-client.xml.example
  32. +55 −0 singleNodeHadoop/coronaConf/ssl-server.xml.example
  33. +4 −0 singleNodeHadoop/coronaConf/taskcontroller.cfg
  34. +117 −0 singleNodeHadoop/mapredConf/capacity-scheduler.xml
  35. +24 −0 singleNodeHadoop/mapredConf/configuration.xsl
  36. +23 −0 singleNodeHadoop/mapredConf/core-site.xml
  37. +10 −0 singleNodeHadoop/mapredConf/corona.xml
  38. +68 −0 singleNodeHadoop/mapredConf/hadoop-env.sh
  39. +20 −0 singleNodeHadoop/mapredConf/hadoop-metrics.properties
  40. +97 −0 singleNodeHadoop/mapredConf/hadoop-policy.xml
  41. +20 −0 singleNodeHadoop/mapredConf/hdfs-site.xml
  42. +115 −0 singleNodeHadoop/mapredConf/log4j.properties
  43. +31 −0 singleNodeHadoop/mapredConf/mapred-queue-acls.xml
  44. +95 −0 singleNodeHadoop/mapredConf/mapred-site.xml
  45. +1 −0 singleNodeHadoop/mapredConf/masters
  46. +17 −0 singleNodeHadoop/mapredConf/pools.xml
  47. +1 −0 singleNodeHadoop/mapredConf/proxyjtnode
  48. +1 −0 singleNodeHadoop/mapredConf/slaves
  49. +57 −0 singleNodeHadoop/mapredConf/ssl-client.xml.example
  50. +55 −0 singleNodeHadoop/mapredConf/ssl-server.xml.example
  51. +4 −0 singleNodeHadoop/mapredConf/taskcontroller.cfg
  52. +38 −0 singleNodeHadoop/singleNodeSwitch.sh
  53. +8 −0 src/contrib/benchmark/ivy.xml
  54. +2 −1 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/AllTestDriver.java
  55. +452 −0 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/DFSGeneralTest.java
  56. +41 −0 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/GeneralConstant.java
  57. +2 −2 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/NNThroughputBenchmark.java
  58. +605 −0 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/DatanodeBenThread.java
  59. +79 −0 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/GenMapper.java
  60. +237 −0 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/GenReaderThread.java
  61. +137 −0 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/GenReduce.java
  62. +154 −0 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/GenThread.java
  63. +331 −0 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/GenWriterThread.java
  64. +13 −4 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/MultiTaskTracker.java
  65. +173 −17 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/SleepJobRunner.java
  66. +3 −0 src/contrib/build-contrib.xml
  67. +12 −1 src/contrib/build.xml
  68. +44 −2 src/contrib/corona/build.xml
  69. +64 −12 src/contrib/corona/interface/ClusterManager.thrift
  70. +19 −2 src/contrib/corona/ivy.xml
  71. +6 −0 src/contrib/corona/ivy/libraries.properties
  72. +1,533 −23 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/ClusterManagerService.java
  73. +96 −48 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/ClusterNodeInfo.java
  74. +322 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/DisallowedNode.java
  75. +949 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/NodeUsageReport.java
  76. +26 −10 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/ResourceGrant.java
  77. +42 −26 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/ResourceRequest.java
  78. +59 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/ResourceType.java
  79. +682 −18 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/SessionDriverService.java
  80. +95 −10 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/SessionInfo.java
  81. +94 −2 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/SessionRegistrationData.java
  82. +6 −1 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/SessionStatus.java
  83. +59 −7 src/contrib/corona/src/java/org/apache/hadoop/corona/BinarySearcher.java
  84. +209 −74 src/contrib/corona/src/java/org/apache/hadoop/corona/ClusterManager.java
  85. +228 −50 src/contrib/corona/src/java/org/apache/hadoop/corona/ClusterManagerMetrics.java
  86. +1 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/ClusterManagerServer.java
  87. +126 −46 src/contrib/corona/src/java/org/apache/hadoop/corona/ClusterNode.java
Sorry, we could not display the entire diff because too many files (615) changed.
View
2 FB-CHANGES.txt
@@ -230,3 +230,5 @@ Release 0.20.3 + FB - Unreleased.
HDFS-955 Fix Edits log/Save FSImage bugs
HADOOP-6683 the first optimization: ZlibCompressor does not fully utilize the buffer
HADOOP-7111 Several TFile tests failing when native libraries are present
+ HADOOP-7444 Add Checksum API to verify and calculate checksums "in bulk" (todd)
+ HADOOP-7443 Add CRC32C as another DataChecksum implementation (todd)
View
51 bin/hadoop
@@ -61,11 +61,14 @@ if [ $# = 0 ]; then
echo " datanode run a DFS datanode"
echo " dfsadmin run a DFS admin client"
echo " mradmin run a Map-Reduce admin client"
+ echo " coronaadmin run a Corona admin client"
echo " fsck run a DFS filesystem checking utility"
+ echo " avatarfsck run a avatar DFS filesystem checking utility"
echo " raidfsck [path] run RAID-aware filesystem checking utility"
echo " raidshell [options] run RAID-shell utility"
echo " fs run a generic filesystem user client"
echo " balancer run a cluster balancing utility"
+ echo " avatarbalancer run a avatar cluster balancing utility"
echo " jmxget get JMX exported values from NameNode or DataNode."
echo " oiv apply the offline fsimage viewer to an fsimage"
echo " oev apply the offline edits viewer to an edits file"
@@ -155,10 +158,6 @@ for f in $HADOOP_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
-for f in $HADOOP_HOME/lib/logger/*.jar; do
- CLASSPATH=${CLASSPATH}:$f;
-done
-
if [ -d "$HADOOP_HOME/build/ivy/lib/Hadoop/common" ]; then
for f in $HADOOP_HOME/build/ivy/lib/Hadoop/common/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
@@ -229,9 +228,13 @@ if [ "$COMMAND" = "namenode" ] ; then
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_NAMENODE_OPTS"
elif [ "$COMMAND" = "avatarshell" ] ; then
CLASS='org.apache.hadoop.hdfs.AvatarShell'
+ HADOOP_LOGFILE='avatarshell.log'
+ HADOOP_ROOT_LOGGER=INFO,DRFA
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "avatarzk" ] ; then
CLASS='org.apache.hadoop.hdfs.AvatarZKShell'
+ HADOOP_LOGFILE='avatarzkshell.log'
+ HADOOP_ROOT_LOGGER=INFO,DRFA
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "avatarnode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.AvatarNode'
@@ -240,6 +243,20 @@ elif [ "$COMMAND" = "avatarnode" ] ; then
elif [ "$COMMAND" = "secondarynamenode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
+elif [ "$COMMAND" = "fsshellservice" ] ; then
+ CLASS='org.apache.hadoop.hdfs.fsshellservice.FsShellServiceImpl'
+ if [ -d "$HADOOP_HOME/build/contrib/corona/lib" ]; then
+ for f in $HADOOP_HOME/build/contrib/corona/lib/*.jar; do
+ CLASSPATH=${CLASSPATH}:$f;
+ done
+ fi
+ if [ -d "$HADOOP_HOME/build/contrib/fsshellservice/" ]; then
+ CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/contrib/fsshellservice/classes
+ fi
+ for f in $HADOOP_HOME/contrib/fsshellservice/*.jar; do
+ CLASSPATH=${CLASSPATH}:$f;
+ done
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "avatardatanode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.datanode.AvatarDataNode'
JMX_OPTS=$HADOOP_JMX_DATANODE_OPTS
@@ -268,6 +285,9 @@ elif [ "$COMMAND" = "version" ] ; then
elif [ "$COMMAND" = "fsck" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DFSck
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "avatarfsck" ] ; then
+ CLASS=org.apache.hadoop.hdfs.tools.AvatarDFSck
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "raidfsck" ] ; then
CLASS=org.apache.hadoop.raid.RaidShell
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
@@ -279,6 +299,10 @@ elif [ "$COMMAND" = "balancer" ] ; then
CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
CMDLINE_OPTS="$CMDLINE_OPTS $BALANCER_CMDLINE_OPTS"
+elif [ "$COMMAND" = "avatarbalancer" ] ; then
+ CLASS=org.apache.hadoop.hdfs.server.balancer.AvatarBalancer
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
+ CMDLINE_OPTS="$CMDLINE_OPTS $BALANCER_CMDLINE_OPTS"
elif [ "$COMMAND" = "oiv" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
@@ -299,16 +323,22 @@ elif [ "$COMMAND" = "coronaclustermanager" ] ; then
CLASS=org.apache.hadoop.corona.ClusterManagerServer
JMX_OPTS=$HADOOP_JMX_CORONACLUSTERMANAGER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_CORONACLUSTERMANAGER_OPTS"
- CLASSPATH=${CLASSPATH}:${CORONA_LIB_PATH}
+ # Corona lib path should be first to ensure that it uses the right thrift JAR
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronatasktracker" ] ; then
CLASS=org.apache.hadoop.mapred.CoronaTaskTracker
JMX_OPTS=$HADOOP_JMX_TASKTRACKER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_TASKTRACKER_OPTS"
- CLASSPATH=${CLASSPATH}:${CORONA_LIB_PATH}
+ # See coronaclustermanager comment
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronaproxyjobtracker" ] ; then
CLASS=org.apache.hadoop.mapred.ProxyJobTracker
JMX_OPTS=$HADOOP_JMX_CORONAPROXYJOBTRACKER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_CORONAPROXYJOBTRACKER_OPTS"
+elif [ "$COMMAND" = "coronaadmin" ] ; then
+ CLASS=org.apache.hadoop.corona.CoronaAdmin
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "tasktracker" ] ; then
CLASS=org.apache.hadoop.mapred.TaskTracker
JMX_OPTS=$HADOOP_JMX_TASKTRACKER_OPTS
@@ -317,6 +347,15 @@ elif [ "$COMMAND" = "tasktracker" ] ; then
if [ -n "$HADOOP_INSTANCE" ] ; then
CMDLINE_OPTS="-instance $HADOOP_INSTANCE $CMDLINE_OPTS"
fi
+elif [ "$COMMAND" = "multitasktracker" ] ; then
+ CLASS=org.apache.hadoop.mapred.MultiTaskTracker
+ HADOOP_ROOT_LOGGER=${HADOOP_TASKTRACKER_LOGGER:-$HADOOP_ROOT_LOGGER}
+ # This should be the number of tasktrackers
+ if [ -n "$MULTI_TT_SIZE" ] ; then
+ CMDLINE_OPTS="$MULTI_TT_SIZE"
+ else
+ CMDLINE_OPTS="1"
+ fi
elif [ "$COMMAND" = "job" ] ; then
CLASS=org.apache.hadoop.mapred.JobClient
elif [ "$COMMAND" = "queue" ] ; then
View
20 bin/hadoop-daemon.sh
@@ -95,6 +95,7 @@ fi
# some variables
export HADOOP_LOGFILE=hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.log
export HADOOP_ROOT_LOGGER="INFO,DRFA"
+jps_cmd="$JAVA_HOME/bin/jps"
log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.out
pid=$HADOOP_PID_DIR/hadoop-$HADOOP_IDENT_STRING-$command.pid
gc_log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-gc.log
@@ -116,13 +117,22 @@ case $startStop in
# signals. It's possible that the pid in our pidfile is now a thread
# owned by another process. Let's check to make sure our pid is
# actually a running process.
- ps -e -o pid | egrep "^`cat $pid`$" >/dev/null 2>&1
+ ps -e -o pid | egrep "^[[:space:]]*`cat $pid`$" >/dev/null 2>&1
if [ $? -eq 0 ]; then
- echo $command running as process `cat $pid`. Stop it first.
- exit 1
- else
- rm $pid
+ # If the pid is from a JVM process of the same type, then we need
+ # to abort. If not, then we can clean up the pid file and carry on.
+ type_of_pid="$(set -o pipefail; $jps_cmd | awk /^$(cat $pid)/'{print tolower($2)}')"
+ if [ $? -ne 0 ]; then
+ echo "$jps_cmd failed. Running process `cat $pid` might be"\
+ "a $command process. Please investigate."
+ exit 1
+ fi
+ if [ "$command" == "$type_of_pid" ]; then
+ echo $command running as process `cat $pid`. Stop it first.
+ exit 1
+ fi
fi
+ rm $pid
fi
fi
View
2 bin/start-corona.sh
@@ -26,5 +26,5 @@ bin=`cd "$bin"; pwd`
# start corona daemons
# start clustermanager first to minimize connection errors at startup
"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start coronaclustermanager
-"$bin"/start-proxyjt-remote.sh --config $HADOOP_CONF_DIR
+"$bin"/start-proxyjt.sh --config $HADOOP_CONF_DIR
"$bin"/hadoop-daemons.sh --config $HADOOP_CONF_DIR start coronatasktracker
View
31 bin/start-fsshellservice.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+usage="Usage: start-fsshellservice.sh"
+
+params=$#
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hadoop-config.sh
+
+# get arguments
+if [ $# -ge 1 ]; then
+ echo $usage
+fi
+
+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start fsshellservice
View
42 bin/start-multitasktracker.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Start hadoop map reduce daemons. Run this on the local machine. By default
+# logs are written to /tmp/hadoop/
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hadoop-config.sh
+
+# Add contrib jars to classpath. Needed for FairScheduler
+for f in "$bin"/../build/contrib/*/*.jar; do
+ echo "Adding $f to classpath"
+ export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:$f;
+done
+
+export HADOOP_OPTS="$HADOOP_OPTS -Dcom.sun.management.jmxremote
+-Dcom.sun.management.jmxremote.port=8697 \
+-Dcom.sun.management.jmxremote.authenticate=false
+-Dcom.sun.management.jmxremote.ssl=false \
+-verbose:gc -XX:+PrintGCDateStamps -XX:+PrintGCDetails \
+-Xloggc:/usr/local/hadoop/logs/MRSIM/multitasktracker.gc.log \
+-XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC"
+# start mapred daemons
+# start jobtracker first to minimize connection errors at startup
+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start multitasktracker
View
31 bin/stop-fsshellservice.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+usage="Usage: stop-fsshellservice.sh"
+
+params=$#
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hadoop-config.sh
+
+# get arguments
+if [ $# -ge 1 ]; then
+ echo $usage
+fi
+
+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop fsshellservice
View
26 bin/stop-multitasktracker.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Stop hadoop map reduce daemons. Run this on master node.
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hadoop-config.sh
+
+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop multitasktracker
View
2 bin/stop-tasktracker.sh
@@ -43,7 +43,7 @@ if [ -f $pid ]; then
pidvalue=$(cat $pid)
for i in `ps -o pid --no-headers --ppid $pidvalue`; do
echo "Killing process group $i"
- kill -- -$i;
+ kill -s 9 -- -$i;
done
fi
View
69 build.xml
@@ -27,7 +27,7 @@
<property name="Name" value="Facebook's unified version of Apache Hadoop"/>
<property name="name" value="hadoop"/>
- <property name="version" value="0.20.1-dev"/>
+ <property name="version" value="0.20"/>
<property name="final.name" value="${name}-${version}"/>
<property name="year" value="2009"/>
@@ -132,7 +132,9 @@
<property name="javac.deprecation" value="off"/>
<property name="javac.version" value="1.6"/>
<property name="javac.args" value=""/>
- <property name="javac.args.warnings" value="-Xlint:unchecked"/>
+ <!-- NOTE: -XDignore.symbol.file removes annoying warning about use
+ of proprietary sun.misc.Unsafe -->
+ <property name="javac.args.warnings" value="-Xlint:unchecked -XDignore.symbol.file"/>
<property name="clover.db.dir" location="${build.dir}/test/clover/db"/>
<property name="clover.report.dir" location="${build.dir}/test/clover/reports"/>
@@ -201,15 +203,15 @@
<property name="common.ivy.lib.dir" location="${build.ivy.lib.dir}/${ant.project.name}/common"/>
<property name="build.ivy.report.dir" location="${build.ivy.dir}/report" />
<property name="build.ivy.maven.dir" location="${build.ivy.dir}/maven" />
- <property name="build.ivy.maven.pom" location="${build.ivy.maven.dir}/hadoop-core-${hadoop.version}.pom" />
- <property name="build.ivy.maven.jar" location="${build.ivy.maven.dir}/hadoop-core-${hadoop.version}.jar" />
+ <property name="build.ivy.maven.pom" location="${build.ivy.maven.dir}/hadoop-core-${version}.pom" />
+ <property name="build.ivy.maven.jar" location="${build.ivy.maven.dir}/hadoop-core-${version}.jar" />
<!--this is the naming policy for artifacts we want pulled down-->
<property name="ivy.artifact.retrieve.pattern" value="${ant.project.name}/[conf]/[artifact]-[revision].[ext]"/>
<!--this is how artifacts that get built are named-->
<property name="ivy.publish.pattern" value="hadoop-[revision]-core.[ext]"/>
- <property name="hadoop.jar" location="${build.dir}/hadoop-${hadoop.version}-core.jar" />
+ <property name="hadoop.jar" location="${build.dir}/hadoop-${version}-core.jar" />
<!-- jdiff.home property set -->
<property name="jdiff.home" value="${build.ivy.lib.dir}/${ant.project.name}/jdiff"/>
@@ -342,6 +344,7 @@
<target name="compile-rcc-compiler" depends="init, record-parser">
<javac
+ includeantruntime="true"
encoding="${build.encoding}"
srcdir="${core.src.dir}"
includes="org/apache/hadoop/record/compiler/**/*.java"
@@ -366,6 +369,7 @@
</taskdef>
<!-- Compile Java files (excluding JSPs) checking warnings -->
<javac
+ includeantruntime="true"
encoding="${build.encoding}"
srcdir="${core.src.dir}"
includes="org/apache/hadoop/**/*.java"
@@ -403,6 +407,7 @@
<!-- Compile Java files (excluding JSPs) checking warnings -->
<javac
+ includeantruntime="true"
encoding="${build.encoding}"
srcdir="${mapred.src.dir};${build.src}"
includes="org/apache/hadoop/**/*.java"
@@ -439,6 +444,7 @@
<!-- Compile Java files (excluding JSPs) checking warnings -->
<javac
+ includeantruntime="true"
encoding="${build.encoding}"
srcdir="${hdfs.src.dir};${build.src}"
includes="org/apache/hadoop/**/*.java"
@@ -460,6 +466,7 @@
<target name="compile-tools" depends="init">
<javac
+ includeantruntime="true"
encoding="${build.encoding}"
srcdir="${tools.src}"
includes="org/apache/hadoop/**/*.java"
@@ -494,6 +501,8 @@
<mkdir dir="${build.native}/src/org/apache/hadoop/io/compress/zlib"/>
<mkdir dir="${build.native}/src/org/apache/hadoop/io/compress/lzma"/>
<mkdir dir="${build.native}/src/org/apache/hadoop/syscall"/>
+ <mkdir dir="${build.native}/src/org/apache/hadoop/util"/>
+ <mkdir dir="${build.native}/src/org/apache/hadoop/io/nativeio"/>
<javah
classpath="${build.classes}"
@@ -515,6 +524,17 @@
<class name="org.apache.hadoop.io.compress.lzma.LzmaDecompressor" />
</javah>
+ <echo> Running javah NativeIO.java </echo>
+ <javah
+ classpath="${build.classes}"
+ destdir="${build.native}/src/org/apache/hadoop/io/nativeio"
+ force="yes"
+ verbose="yes"
+ >
+ <class name="org.apache.hadoop.io.nativeio.NativeIO" />
+ </javah>
+
+ <echo> Running javah LinuxSystemCall.java </echo>
<javah
classpath="${build.classes}"
destdir="${build.native}/src/org/apache/hadoop/syscall"
@@ -524,6 +544,16 @@
<class name="org.apache.hadoop.syscall.LinuxSystemCall" />
</javah>
+ <echo> Running javah bulk_crc32.java </echo>
+ <javah
+ classpath="${build.classes}"
+ destdir="${build.native}/src/org/apache/hadoop/util"
+ force="yes"
+ verbose="yes"
+ >
+ <class name="org.apache.hadoop.util.NativeCrc32" />
+ </javah>
+
<exec dir="${build.native}" executable="sh" failonerror="true">
<env key="OS_NAME" value="${os.name}"/>
<env key="OS_ARCH" value="${os.arch}"/>
@@ -569,6 +599,7 @@
<target name="compile-examples"
depends="compile-core,compile-tools,compile-c++-examples">
<javac
+ includeantruntime="true"
encoding="${build.encoding}"
srcdir="${examples.dir}"
includes="org/apache/hadoop/**/*.java"
@@ -662,6 +693,7 @@
<!-- ================================================================== -->
<target name="compile-core-test" depends="compile-examples, compile-tools, generate-test-records">
<javac
+ includeantruntime="true"
encoding="${build.encoding}"
srcdir="${test.generated.dir}"
includes="org/apache/hadoop/**/*.java"
@@ -675,6 +707,7 @@
<classpath refid="test.classpath"/>
</javac>
<javac
+ includeantruntime="true"
encoding="${build.encoding}"
srcdir="${test.src.dir}"
includes="org/apache/hadoop/**/*.java"
@@ -688,6 +721,7 @@
<classpath refid="test.classpath"/>
</javac>
<javac
+ includeantruntime="true"
encoding="${build.encoding}"
srcdir="${test.src.dir}/testjar"
includes="*.java"
@@ -705,6 +739,7 @@
basedir="${test.build.testjar}">
</jar>
<javac
+ includeantruntime="true"
encoding="${build.encoding}"
srcdir="${test.src.dir}/testshell"
includes="*.java"
@@ -870,7 +905,13 @@
</subant>
</target>
- <target name="test" depends="test-core, test-contrib" description="Run core, contrib unit tests">
+ <target name="test" depends="clean, compile" description="Run core, contrib unit tests">
+ <subant target="test-core" failonerror="false">
+ <fileset file="build.xml"/>
+ </subant>
+ <subant target="test-contrib">
+ <fileset file="build.xml"/>
+ </subant>
</target>
<!-- Run all unit tests, not just Test*, and use non-test configuration. -->
@@ -1226,6 +1267,7 @@
<copy todir="${dist.dir}/" file="build.xml"/>
+ <chmod perm="ugo+x" file="${dist.dir}/src/native/configure"/>
<chmod perm="ugo+x" type="file" parallel="false">
<fileset dir="${dist.dir}/bin"/>
<fileset dir="${dist.dir}/src/contrib/">
@@ -1250,6 +1292,7 @@
<exclude name="${final.name}/contrib/*/bin/*" />
<exclude name="${final.name}/src/contrib/ec2/bin/*" />
<exclude name="${final.name}/src/contrib/ec2/bin/image/*" />
+ <exclude name="${final.name}/src/native/configure" />
<include name="${final.name}/**" />
</tarfileset>
<tarfileset dir="${build.dir}" mode="755">
@@ -1257,6 +1300,7 @@
<include name="${final.name}/contrib/*/bin/*" />
<include name="${final.name}/src/contrib/ec2/bin/*" />
<include name="${final.name}/src/contrib/ec2/bin/image/*" />
+ <include name="${final.name}/src/native/configure" />
</tarfileset>
</param.listofitems>
</macro_tar>
@@ -1334,6 +1378,11 @@
</target>
<target name="binary" depends="bin-package" description="Make tarball without source and documentation">
+ <copy todir="${build.dir}/${final.name}" includeEmptyDirs="false">
+ <fileset dir="${build.dir}/ivy/maven/">
+ <include name="generated.pom"/>
+ </fileset>
+ </copy>
<macro_tar param.destfile="${build.dir}/${final.name}-bin.tar.gz">
<param.listofitems>
<tarfileset dir="${build.dir}" mode="664">
@@ -1344,6 +1393,7 @@
</tarfileset>
<tarfileset dir="${build.dir}" mode="755">
<include name="${final.name}/bin/*" />
+ <include name="${final.name}/generated.pom"/>
</tarfileset>
</param.listofitems>
</macro_tar>
@@ -1584,6 +1634,7 @@
<target name="compile-ant-tasks" depends="compile-core">
<javac
+ includeantruntime="true"
encoding="${build.encoding}"
srcdir="${anttasks.dir}"
includes="org/apache/hadoop/ant/**/*.java"
@@ -1866,7 +1917,7 @@
<ivy:publish
settingsRef="${ant.project.name}.ivy.settings"
resolver="local"
- pubrevision="${hadoop.version}"
+ pubrevision="${version}"
overwrite="true"
artifactspattern="${build.dir}/${ivy.publish.pattern}" />
</target>
@@ -1961,5 +2012,9 @@
</exec>
</target>
<!-- end of task-controller target -->
+
+ <target name="print-version">
+ <echo message="${version}" />
+ </target>
</project>
View
2 conf/log4j.properties
@@ -34,7 +34,7 @@ log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
# Pattern format: Date LogLevel LoggerName LogMessage
-log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
# Debugging Pattern format
#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
View
135 conf/log4j.properties.scribeappender
@@ -0,0 +1,135 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=ALL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+#log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# Rollver at the top of every hour
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd-HH
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# logmonitor understood
+# This format is the one that logmonitor can understand. It is heavyweight so
+# should only be used for WARN and above
+#
+
+log4j.appender.LM=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.LM.threshold=WARN
+log4j.appender.LM.File=${hadoop.log.dir}/logmonitor-${hadoop.log.file}
+log4j.appender.LM.DatePattern=.yyyy-MM-dd-HH
+log4j.appender.LM.layout=org.apache.log4j.PatternLayout
+log4j.appender.LM.layout.ConversionPattern=[%c{3},%L] [%d{EEE MMM dd HH:mm:ss yyyy}] %p: %m%n
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.hdfs.server.FSNamesystem.audit=INFO
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
+
+# Special appender for RAID metrics.
+log4j.logger.RaidMetrics=INFO,SCRIBE_RAID_METRICS_APPENDER
+
+# RaidMetrics
+log4j.appender.SCRIBE_RAID_METRICS_APPENDER=com.facebook.logging.ScribeAppender
+log4j.appender.SCRIBE_RAID_METRICS_APPENDER.tag=${hadoop.tasklog.taskid}
+log4j.appender.SCRIBE_RAID_METRICS_APPENDER.application=raid
+log4j.appender.SCRIBE_RAID_METRICS_APPENDER.installation=${hadoop.installationid}
+log4j.appender.SCRIBE_RAID_METRICS_APPENDER.layout=org.apache.log4j.PatternLayout
+log4j.appender.SCRIBE_RAID_METRICS_APPENDER.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.appender.SCRIBE_RAID_METRICS_APPENDER.Threshold=INFO
+
+#
+# This is a scribe appender. The data will be sent directly to scribe
+#
+#
+log4j.appender.scribe=com.facebook.logging.ScribeAppender
+log4j.appender.scribe.tag=${hadoop.tasklog.taskid}
+log4j.appender.scribe.application=${hadoop.application}
+log4j.appender.scribe.installation=${hadoop.installationid}
+log4j.appender.scribe.layout=org.apache.log4j.PatternLayout
+log4j.appender.scribe.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.appender.scribe.threshold=WARN
+
+log4j.logger.com.facebook.smc.SmcUtil=INFO,console
View
67 copy-hdfs-jars-to-maven.sh
@@ -5,59 +5,68 @@
# and test) built in titan/VENDOR/hadoop-0.20/.
#
+set -e -u -o pipefail
BASEDIR=`dirname $0`
cd ${BASEDIR}
-if [ ! -f build/hadoop-0.20.1-dev-core.jar ]; then
- if [ ! -f build/hadoop-0.20-core.jar ]; then
- echo "core jar not found. Running 'ant jar'..."
- ant jar | grep BUILD;
- fi
+VERSION=$( ant -q print-version | head -1 | awk '{print $2}' )
+if [ -z "$VERSION" ]; then
+ echo "Unable to determine Hadoop version" >&2
+ exit 1
fi
-if [ ! -f build/hadoop-0.20.1-dev-test.jar ]; then
- if [ ! -f build/hadoop-0.20-test.jar ]; then
- echo "test jar not found. Running 'ant jar-test'..."
- ant jar-test | grep BUILD;
- fi
+TARGETS=""
+
+CORE_JAR=build/hadoop-$VERSION-core.jar
+if [ ! -f $CORE_JAR ]; then
+ TARGETS="$TARGETS jar"
fi
+CORE_POM=build/ivy/maven/generated.pom
+if [ ! -f $CORE_POM ]; then
+ TARGETS="$TARGETS makepom"
+fi
-#
-# The names of core/test jar name depend
-# on whether they were generated using
-# build_all.sh script or just the vanilla
-# simple ant jar/jar-test
-#
-if [ -f build/hadoop-0.20.1-dev-core.jar ]; then
- CORE_JAR=build/hadoop-0.20.1-dev-core.jar
-else
- CORE_JAR=build/hadoop-0.20-core.jar
+TEST_JAR=build/hadoop-$VERSION-test.jar
+if [ ! -f $TEST_JAR ]; then
+ TARGETS="$TARGETS jar-test"
fi
-if [ -f build/hadoop-0.20.1-dev-test.jar ]; then
- TEST_JAR=build/hadoop-0.20.1-dev-test.jar
-else
- TEST_JAR=build/hadoop-0.20-test.jar
+if [ -n "$TARGETS" ]; then
+ ant $TARGETS
fi
+# Clear the optional flag on Hadoop dependencies so these dependencies can be
+# included transitively in other projects.
+CORE_POM_MODIFIED=$CORE_POM.new
+./edit_generated_pom.py >$CORE_POM_MODIFIED
+
echo "** Publishing hadoop* core & test jars "
echo "** to "
echo "** your local maven repo (~/.m2/repository). "
echo "** HBase builds will pick up the HDFS* jars from the local maven repo."
-mvn install:install-file \
- -DgeneratePom=true \
+# When running under Commander, use the setting.xml file that specifies
+# the localRepository for a central mvn repo that can be shared between
+# all of the build/test agents
+OPTS=""
+if [[ -n "${COMMANDER_WORKSPACE:-}" || "$USER" == "svcscm" ]]; then
+ OPTS="-s /scm/git/electric/hadoop_builds/settings.xml"
+fi
+
+mvn $OPTS install:install-file \
+ -DpomFile=$CORE_POM_MODIFIED \
-DgroupId=org.apache.hadoop \
-DartifactId=hadoop-core \
- -Dversion=0.20 \
+ -Dversion=$VERSION \
-Dpackaging=jar \
-Dfile=${CORE_JAR}
-mvn install:install-file \
+mvn $OPTS install:install-file \
-DgeneratePom=true \
-DgroupId=org.apache.hadoop \
-DartifactId=hadoop-test \
- -Dversion=0.20 \
+ -Dversion=$VERSION \
-Dpackaging=jar \
-Dfile=${TEST_JAR}
+
View
60 edit_generated_pom.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+'''
+Reads the automatically generated Hadoop pom file, removes the "optional"
+flag from dependencies so that they could be included transitively into other
+projects such as HBase, and removes certain dependencies that are not required
+and could even break the code (e.g. an old version of xerces). Writes the
+modified project object model XML to standard output.
+'''
+
+import os
+import re
+import sys
+
+from xml.dom.minidom import parse
+
+NON_TRANSITIVE_DEPS = [
+ # Old version, breaks HBase
+ 'xerces',
+
+ # Not used in production
+ 'checkstyle',
+ 'jdiff',
+
+ # A release audit tool, probably not used in prod
+ 'rat-lib',
+]
+
+POM_FILE = 'build/ivy/maven/generated.pom'
+doc = parse(POM_FILE)
+deps = doc.getElementsByTagName('dependencies')[0]
+
+for dep in deps.getElementsByTagName('dependency'):
+ for c in dep.childNodes:
+ if (c.nodeName == 'artifactId' and
+ c.firstChild and
+ c.firstChild.nodeValue and
+ c.firstChild.nodeValue.strip() in NON_TRANSITIVE_DEPS):
+ deps.removeChild(dep)
+ break
+
+ for o in dep.getElementsByTagName('optional'):
+ dep.removeChild(o)
+
+out_lines = doc.toprettyxml(indent=' ' * 2)
+lines = []
+for l in out_lines.split('\n'):
+ l = l.rstrip()
+ if l:
+ lines.append(l)
+output = '\n'.join(lines)
+
+# Make sure values stay on the same line: <element>value</element>
+output = re.sub(
+ r'(<([a-zA-Z]+)>)'
+ r'\s*([^<>]+?)\s*'
+ r'(</\2>)', r'\1\3\4', output)
+
+print output
+
View
18 ivy.xml
@@ -33,7 +33,7 @@
extends="client,server,s3-server,kfs"/>
<conf name="mandatory" description="contains the critical dependencies"
- extends="commons-logging,log4j"/>
+ extends="commons-logging,log4j,guava"/>
<!--
These public configurations contain the core dependencies for running hadoop client or server.
@@ -67,6 +67,7 @@
<conf name="commons-logging" visibility="private"/>
<conf name="httpclient" visibility="private" extends="commons-logging"/>
<conf name="log4j" visibility="private"/>
+ <conf name="guava" visibility="private"/>
<conf name="lucene" visibility="private"/>
<conf name="jdiff" visibility="private" extends="log4j,s3-client,jetty,server"/>
<conf name="checkstyle" visibility="private"/>
@@ -149,6 +150,16 @@
name="jasper-compiler"
rev="${jasper.version}"
conf="jetty->master"/>
+
+ <dependency org="org.jboss.netty"
+ name="netty"
+ rev="${netty.version}"
+ conf="jetty->master"/>
+
+ <dependency org="org.json"
+ name="json"
+ rev="${json.version}"
+ conf="common->default"/>
<!-- this is resolved locally from the lib folder
<dependency org="tomcat"
name="jsp-api"
@@ -185,6 +196,11 @@
rev="${log4j.version}"
conf="log4j->master"/>
+ <dependency org="com.google.guava"
+ name="guava"
+ rev="${guava.version}"
+ conf="guava->master"/>
+
<!--Configuration: s3-client -->
<!--there are two jets3t projects in the repository; this one goes up to 0.6 and
is assumed to be the live one-->
View
6 ivy/libraries.properties
@@ -35,6 +35,8 @@ commons-net.version=1.4.1
core.version=3.1.1
coreplugin.version=1.3.2
+guava.version=r09
+
hsqldb.version=1.8.0.10
#ivy.version=2.0.0-beta2
@@ -51,7 +53,7 @@ jetty.jsp.version=6.1.14
jetty-util.version=6.1.26
junit.version=4.5
jdiff.version=1.0.9
-json.version=1.0
+json.version=20090211
kfs.version=0.1
@@ -60,6 +62,8 @@ lucene-core.version=2.3.1
mockito-all.version=1.8.2
+netty.version=3.2.2.Final
+
oro.version=2.0.8
rats-lib.version=0.5.1
View
117 singleNodeHadoop/coronaConf/capacity-scheduler.xml
@@ -0,0 +1,117 @@
+<?xml version="1.0"?>
+
+<!-- This is the configuration file for the resource manager in Hadoop. -->
+<!-- You can configure various scheduling parameters related to queues. -->
+<!-- The properties for a queue follow a naming convention,such as, -->
+<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. -->
+
+<configuration>
+
+ <property>
+ <name>mapred.capacity-scheduler.queue.default.capacity</name>
+ <value>100</value>
+ <description>Percentage of the number of slots in the cluster that are
+ to be available for jobs in this queue.
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.capacity-scheduler.queue.default.maximum-capacity</name>
+ <value>-1</value>
+ <description>
+ maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster.
+ This provides a means to limit how much excess capacity a queue can use. By default, there is no limit.
+ The maximum-capacity of a queue can only be greater than or equal to its minimum capacity.
+ Default value of -1 implies a queue can use complete capacity of the cluster.
+
+ This property could be to curtail certain jobs which are long running in nature from occupying more than a
+ certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of
+ other queues being affected.
+
+ One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity
+ the max capacity would change. So if large no of nodes or racks get added to the cluster , max Capacity in
+ absolute terms would increase accordingly.
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.capacity-scheduler.queue.default.supports-priority</name>
+ <value>false</value>
+ <description>If true, priorities of jobs will be taken into
+ account in scheduling decisions.
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name>
+ <value>100</value>
+ <description> Each queue enforces a limit on the percentage of resources
+ allocated to a user at any given time, if there is competition for them.
+ This user limit can vary between a minimum and maximum value. The former
+ depends on the number of users who have submitted jobs, and the latter is
+ set to this property value. For example, suppose the value of this
+ property is 25. If two users have submitted jobs to a queue, no single
+ user can use more than 50% of the queue resources. If a third user submits
+ a job, no single user can use more than 33% of the queue resources. With 4
+ or more users, no user can use more than 25% of the queue's resources. A
+ value of 100 implies no user limits are imposed.
+ </description>
+ </property>
+ <property>
+ <name>mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user</name>
+ <value>2</value>
+ <description>The maximum number of jobs to be pre-initialized for a user
+ of the job queue.
+ </description>
+ </property>
+
+ <!-- The default configuration settings for the capacity task scheduler -->
+ <!-- The default values would be applied to all the queues which don't have -->
+ <!-- the appropriate property for the particular queue -->
+ <property>
+ <name>mapred.capacity-scheduler.default-supports-priority</name>
+ <value>false</value>
+ <description>If true, priorities of jobs will be taken into
+ account in scheduling decisions by default in a job queue.
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>
+ <value>100</value>
+ <description>The percentage of the resources limited to a particular user
+ for the job queue at any given point of time by default.
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.capacity-scheduler.default-maximum-initialized-jobs-per-user</name>
+ <value>2</value>
+ <description>The maximum number of jobs to be pre-initialized for a user
+ of the job queue.
+ </description>
+ </property>
+
+
+ <!-- Capacity scheduler Job Initialization configuration parameters -->
+ <property>
+ <name>mapred.capacity-scheduler.init-poll-interval</name>
+ <value>5000</value>
+ <description>The amount of time in miliseconds which is used to poll
+ the job queues for jobs to initialize.
+ </description>
+ </property>
+ <property>
+ <name>mapred.capacity-scheduler.init-worker-threads</name>
+ <value>5</value>
+ <description>Number of worker threads which would be used by
+ Initialization poller to initialize jobs in a set of queue.
+ If number mentioned in property is equal to number of job queues
+ then a single thread would initialize jobs in a queue. If lesser
+ then a thread would get a set of queues assigned. If the number
+ is greater then number of threads would be equal to number of
+ job queues.
+ </description>
+ </property>
+
+</configuration>
View
24 singleNodeHadoop/coronaConf/configuration.xsl
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="html"/>
+<xsl:template match="configuration">
+<html>
+<body>
+<table border="1">
+<tr>
+ <td>name</td>
+ <td>value</td>
+ <td>description</td>
+</tr>
+<xsl:for-each select="property">
+<tr>
+ <td><a name="{name}"><xsl:value-of select="name"/></a></td>
+ <td><xsl:value-of select="value"/></td>
+ <td><xsl:value-of select="description"/></td>
+</tr>
+</xsl:for-each>
+</table>
+</body>
+</html>
+</xsl:template>
+</xsl:stylesheet>
View
13 singleNodeHadoop/coronaConf/core-site.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+ <name>fs.default.name</name>
+ <value>hdfs://localhost:9000</value>
+</property>
+
+</configuration>
View
10 singleNodeHadoop/coronaConf/corona.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<configuration>
+ <defaultSchedulingMode>FAIR</defaultSchedulingMode>
+ <nodeLocalityWaitM>0</nodeLocalityWaitM>
+ <rackLocalityWaitM>5000</rackLocalityWaitM>
+ <preemptedTaskMaxRunningTime>60000</preemptedTaskMaxRunningTime>
+ <shareStarvingRatio>0.9</shareStarvingRatio>
+ <starvingTimeForShare>60000</starvingTimeForShare>
+ <starvingTimeForMinimum>30000</starvingTimeForMinimum>
+</configuration>
View
70 singleNodeHadoop/coronaConf/hadoop-env.sh
@@ -0,0 +1,70 @@
+# Set Hadoop-specific environment variables here.
+
+# The only required environment variable is JAVA_HOME. All others are
+# optional. When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use. Required.
+# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
+
+# Extra Java CLASSPATH elements. Optional.
+# export HADOOP_CLASSPATH=
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+# export HADOOP_HEAPSIZE=2000
+
+# Extra Java runtime options. Empty by default.
+# export HADOOP_OPTS=-server
+
+# Command specific options appended to HADOOP_OPTS when specified
+export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
+export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
+export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
+export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
+export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
+export HADOOP_RAIDNODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_RAIDNODE_OPTS"
+export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote.port=8998 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
+export HADOOP_CORONACLUSTERMANAGER_OPTS="-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=8987 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false $HADOOP_CORONACLUSTERMANAGER_OPTS"
+export HADOOP_CORONAPROXYJOBTRACKER_OPTS="-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=8986 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false $HADOOP_CORONAPROXYJOBTRACKER_OPTS"
+
+# The only user who can start hadoop daemons.
+# If this is not set, any user can start hadoop daemons.
+export HADOOP_USERNAME=""
+
+# Java Runtime garbage collection options to pass to all Hadoop
+# servers (Namenode, Jobtracker, Datanode, Tasktracker). This must end
+# with a colon ; to which the dynamically generated gc log filename will
+# be appended to. The below defaults work for the Sun JVM, for example
+# in IBM GC, use '-Xverbosegclog:'.
+#export HADOOP_GC_LOG_OPTS="-XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:"
+
+export HADOOP_TASKTRACKER_OPTS="-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=8994 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
+# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+# export HADOOP_CLIENT_OPTS
+
+# Extra ssh options. Empty by default.
+# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
+
+# Where log files are stored. $HADOOP_HOME/logs by default.
+# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
+
+# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
+# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
+
+# host:path where hadoop code should be rsync'd from. Unset by default.
+# export HADOOP_MASTER=master:/home/$USER/src/hadoop
+
+# Seconds to sleep between slave commands. Unset by default. This
+# can be useful in large clusters, where, e.g., slave rsyncs can
+# otherwise arrive faster than the master can service them.
+# export HADOOP_SLAVE_SLEEP=0.1
+
+# The directory where pid files are stored. /tmp by default.
+# export HADOOP_PID_DIR=/var/hadoop/pids
+
+# A string representing this instance of hadoop. $USER by default.
+# export HADOOP_IDENT_STRING=$USER
+
+# The scheduling priority for daemon processes. See 'man nice'.
+# export HADOOP_NICENESS=10
View
28 singleNodeHadoop/coronaConf/hadoop-metrics.properties
@@ -0,0 +1,28 @@
+# Configuration of the "dfs" context for JMX
+dfs.class=org.apache.hadoop.metrics.jmx.JMXContext
+dfs.period=10
+
+# Configuration of the "clustermanager" context for JMX
+clustermanager.class=org.apache.hadoop.metrics.jmx.JMXContext
+clustermanager.period=10
+
+# Configuration of the "proxyjobtracker" context for JMX
+proxyjobtracker.class=org.apache.hadoop.metrics.jmx.JMXContext
+proxyjobtracker.period=10
+
+# Configuration of the "mapred" context for JMX
+mapred.class=org.apache.hadoop.metrics.jmx.JMXContext
+mapred.period=10
+jmx_records=jobtracker,tasktracker,clustermanager,proxyjobtracker
+
+# Configuration of the "jvm" context for JMX
+jvm.class=org.apache.hadoop.metrics.jmx.JMXContext
+jvm.period=10
+
+# Configuration of the "rpc" context for JMX
+rpc.class=org.apache.hadoop.metrics.jmx.JMXContext
+rpc.period=10
+
+# Configuration of the "raidnode" context for JMX
+raidnode.class=org.apache.hadoop.metrics.jmx.JMXContext
+raidnode.period=10
View
97 singleNodeHadoop/coronaConf/hadoop-policy.xml
@@ -0,0 +1,97 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+ <property>
+ <name>security.client.protocol.acl</name>
+ <value>*</value>
+ <description>ACL for ClientProtocol, which is used by user code
+ via the DistributedFileSystem.
+ The ACL is a comma-separated list of user and group names. The user and
+ group list is separated by a blank. For e.g. "alice,bob users,wheel".
+ A special value of "*" means all users are allowed.</description>
+ </property>
+
+ <property>
+ <name>security.client.datanode.protocol.acl</name>
+ <value>*</value>
+ <description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol
+ for block recovery.
+ The ACL is a comma-separated list of user and group names. The user and
+ group list is separated by a blank. For e.g. "alice,bob users,wheel".
+ A special value of "*" means all users are allowed.</description>
+ </property>
+
+ <property>
+ <name>security.datanode.protocol.acl</name>
+ <value>*</value>
+ <description>ACL for DatanodeProtocol, which is used by datanodes to
+ communicate with the namenode.
+ The ACL is a comma-separated list of user and group names. The user and
+ group list is separated by a blank. For e.g. "alice,bob users,wheel".
+ A special value of "*" means all users are allowed.</description>
+ </property>
+
+ <property>
+ <name>security.inter.datanode.protocol.acl</name>
+ <value>*</value>
+ <description>ACL for InterDatanodeProtocol, the inter-datanode protocol
+ for updating generation timestamp.
+ The ACL is a comma-separated list of user and group names. The user and
+ group list is separated by a blank. For e.g. "alice,bob users,wheel".
+ A special value of "*" means all users are allowed.</description>
+ </property>
+
+ <property>
+ <name>security.namenode.protocol.acl</name>
+ <value>*</value>
+ <description>ACL for NamenodeProtocol, the protocol used by the secondary
+ namenode to communicate with the namenode.
+ The ACL is a comma-separated list of user and group names. The user and
+ group list is separated by a blank. For e.g. "alice,bob users,wheel".
+ A special value of "*" means all users are allowed.</description>
+ </property>
+
+ <property>
+ <name>security.inter.tracker.protocol.acl</name>
+ <value>*</value>
+ <description>ACL for InterTrackerProtocol, used by the tasktrackers to
+ communicate with the jobtracker.
+ The ACL is a comma-separated list of user and group names. The user and
+ group list is separated by a blank. For e.g. "alice,bob users,wheel".
+ A special value of "*" means all users are allowed.</description>
+ </property>
+
+ <property>
+ <name>security.job.submission.protocol.acl</name>
+ <value>*</value>
+ <description>ACL for JobSubmissionProtocol, used by job clients to
+ communciate with the jobtracker for job submission, querying job status etc.
+ The ACL is a comma-separated list of user and group names. The user and
+ group list is separated by a blank. For e.g. "alice,bob users,wheel".
+ A special value of "*" means all users are allowed.</description>
+ </property>
+
+ <property>
+ <name>security.task.umbilical.protocol.acl</name>
+ <value>*</value>
+ <description>ACL for TaskUmbilicalProtocol, used by the map and reduce
+ tasks to communicate with the parent tasktracker.
+ The ACL is a comma-separated list of user and group names. The user and
+ group list is separated by a blank. For e.g. "alice,bob users,wheel".
+ A special value of "*" means all users are allowed.</description>
+ </property>
+
+ <property>
+ <name>security.refresh.policy.protocol.acl</name>
+ <value>*</value>
+ <description>ACL for RefreshAuthorizationPolicyProtocol, used by the
+ dfsadmin and mradmin commands to refresh the security policy in-effect.
+ The ACL is a comma-separated list of user and group names. The user and
+ group list is separated by a blank. For e.g. "alice,bob users,wheel".
+ A special value of "*" means all users are allowed.</description>
+ </property>
+
+</configuration>
View
20 singleNodeHadoop/coronaConf/hdfs-site.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+ <name>dfs.support.append</name>
+ <value>true</value>
+ <description>Allow append support since we have the HDFS-200 patch and
+ need append/close support for HLog.java#splitLog</description>
+</property>
+
+<property>
+ <name>dfs.replication</name>
+ <value>1</value>
+</property>
+
+</configuration>
View
115 singleNodeHadoop/coronaConf/log4j.properties
@@ -0,0 +1,115 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+#
+# Job Summary Appender
+#
+# Use following logger to send summary to separate file defined by
+# hadoop.mapreduce.jobsummary.log.file rolled daily:
+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
+#
+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=ALL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=INFO
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
+
+#
+# Job Summary Appender
+#
+log4j.appender.JSA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
+log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
+log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.appender.JSA.DatePattern=.yyyy-MM-dd
+log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
+log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
View
31 singleNodeHadoop/coronaConf/mapred-queue-acls.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- This is a template file for queue acls configuration properties -->
+
+<configuration>
+
+<property>
+ <name>mapred.queue.default.acl-submit-job</name>
+ <value>*</value>
+ <description> Comma separated list of user and group names that are allowed
+ to submit jobs to the 'default' queue. The user list and the group list
+ are separated by a blank. For e.g. alice,bob group1,group2.
+ If set to the special value '*', it means all users are allowed to
+ submit jobs.
+ </description>
+</property>
+
+<property>
+ <name>mapred.queue.default.acl-administer-jobs</name>
+ <value>*</value>
+ <description> Comma separated list of user and group names that are allowed
+ to delete jobs or modify job's priority for jobs not owned by the current
+ user in the 'default' queue. The user list and the group list
+ are separated by a blank. For e.g. alice,bob group1,group2.
+ If set to the special value '*', it means all users are allowed to do
+ this operation.
+ </description>
+</property>
+
+</configuration>
View
127 singleNodeHadoop/coronaConf/mapred-site.xml
@@ -0,0 +1,127 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>mapred.jobtracker.retirejob.interval</name>
+ <value>3600000</value>
+ <description>Time to retire the jobs. Defaulting to one hour.</description>
+</property>
+
+<!--
+<property>
+ <name>mapred.task.tracker.netty.use</name>
+ <value>false</value>
+</property>
+-->
+<property>
+ <name>mapred.job.tracker.http.address</name>
+ <value>0.0.0.0:0</value>
+ <description>JobTracker address. For Corona - bind to any free port
+ </description>
+</property>
+<property>
+ <name>cm.notifier.retry.max</name>
+ <value>2</value>
+ <description>Max number of retries from CM to a session client
+ </description>
+</property>
+<property>
+ <name>cm.session.expiryinterval</name>
+ <value>120000</value>
+ <description>The interval in msec after which a session client is
+ timed out
+ </description>
+</property>
+<property>
+ <name>cm.server.address</name>
+ <value>localhost:50031</value>
+ <description>The host and port that the Corona ClusterManager runs
+ </description>
+</property>
+<property>
+ <name>cm.server.http.address</name>
+ <value>localhost:50032</value>
+ <description>The host and port for Corona ClusterManager web server
+ </description>
+</property>
+<property>
+ <name>mapred.job.tracker.corona.proxyaddr</name>
+ <value>localhost:8080</value>
+ <description>The host and port for Corona ClusterManager web server
+ </description>
+</property>
+<property>
+ <name>corona.proxy.job.tracker.rpcaddr</name>
+ <value>localhost:50033</value>
+ <description>The host and port for Corona Proxy Job Tracker RPC server
+ </description>
+</property>
+<property>
+ <name>cm.cpu.to.resource.partitioning</name>
+ <value>{"1":{"JOBTRACKER":2,"MAP":2,"REDUCE":2},"24":{"JOBTRACKER":3,"MAP":17,"REDUCE":9}}</value>
+ <description>CPU to resource allocation mapping
+ for 1 CPUs, give 2 JTs, 2 mappers and 2 reducers.
+ for 24 CPUs, give 3 JTs, 17 mappers and 9 reducers.
+ </description>
+</property>
+<property>
+ <name>mapred.job.tracker</name>
+ <value>corona</value>
+</property>
+<property>
+ <name>mapred.job.tracker.class</name>
+ <value>org.apache.hadoop.mapred.CoronaJobTracker</value>
+</property>
+
+<!--
+ <property>
+ <name>mapred.job.tracker</name>
+ <value>localhost:50300</value>
+ </property>
+-->
+
+ <property>
+ <name>mapred.task.tracker.netty.use</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>mapred.job.tracker.http.address</name>
+ <!-- cluster variant -->
+ <value>localhost:50030</value>
+ <description>No description</description>
+ <final>true</final>
+ </property>
+
+<property>
+ <name>mapred.max.tracker.blacklists</name>
+ <value>4</value>
+ <description>The number of errors for a taskTracker in various jobs
+ after which the task tracker could be blacklisted across
+ all jobs. The tracker will be given tasks later
+ (after mapred.tasktracker.blacklist.reevaluation.interval
+ msec). The tracker will become a healthy tracker
+ after a restart.
+ </description>
+</property>
+
+<property>
+ <name>mapred.tasktracker.blacklist.reevaluation.interval</name>
+ <value>86400000</value>
+ <description>If a task server has been blacklisted for making
+ mapred.max.tracker.blacklists errors and it has been there for at least
+ this long (in msec), its name will be cleared, and it may be unblacklisted
+ (assuming there aren't any other reasons to keep it there).
+ </description>
+</property>
+
+<property>
+ <name>webinterface.private.actions</name>
+ <value>true</value>
+</property>
+
+</configuration>
View
1 singleNodeHadoop/coronaConf/masters
@@ -0,0 +1 @@
+localhost
View
1 singleNodeHadoop/coronaConf/proxyjtnode
@@ -0,0 +1 @@
+localhost
View
1 singleNodeHadoop/coronaConf/slaves
@@ -0,0 +1 @@
+localhost
View
57 singleNodeHadoop/coronaConf/ssl-client.xml.example
@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+ <name>ssl.client.truststore.location</name>
+ <value></value>
+ <description>Truststore to be used by clients like distcp. Must be
+ specified.
+ </description>
+</property>
+
+<property>
+ <name>ssl.client.truststore.password</name>
+ <value></value>
+ <description>Optional. Default value is "".
+ </description>
+</property>
+
+<property>
+ <name>ssl.client.truststore.type</name>
+ <value>jks</value>
+ <description>Optional. Default value is "jks".
+ </description>
+</property>
+
+<property>
+ <name>ssl.client.keystore.location</name>
+ <value></value>
+ <description>Keystore to be used by clients like distcp. Must be
+ specified.
+ </description>
+</property>
+
+<property>
+ <name>ssl.client.keystore.password</name>
+ <value></value>
+ <description>Optional. Default value is "".
+ </description>
+</property>
+
+<property>
+ <name>ssl.client.keystore.keypassword</name>
+ <value></value>
+ <description>Optional. Default value is "".
+ </description>
+</property>
+
+<property>
+ <name>ssl.client.keystore.type</name>
+ <value>jks</value>
+ <description>Optional. Default value is "jks".
+ </description>
+</property>
+
+</configuration>
View
55 singleNodeHadoop/coronaConf/ssl-server.xml.example
@@ -0,0 +1,55 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+ <name>ssl.server.truststore.location</name>
+ <value></value>
+ <description>Truststore to be used by NN and DN. Must be specified.
+ </description>
+</property>
+
+<property>
+ <name>ssl.server.truststore.password</name>
+ <value></value>
+ <description>Optional. Default value is "".
+ </description>
+</property>
+
+<property>
+ <name>ssl.server.truststore.type</name>
+ <value>jks</value>
+ <description>Optional. Default value is "jks".
+ </description>
+</property>
+
+<property>
+ <name>ssl.server.keystore.location</name>
+ <value></value>
+ <description>Keystore to be used by NN and DN. Must be specified.
+ </description>
+</property>
+
+<property>
+ <name>ssl.server.keystore.password</name>
+ <value></value>
+ <description>Must be specified.
+ </description>
+</property>
+
+<property>
+ <name>ssl.server.keystore.keypassword</name>
+ <value></value>
+ <description>Must be specified.
+ </description>
+</property>
+
+<property>
+ <name>ssl.server.keystore.type</name>
+ <value>jks</value>
+ <description>Optional. Default value is "jks".
+ </description>
+</property>
+
+</configuration>
View
4 singleNodeHadoop/coronaConf/taskcontroller.cfg
@@ -0,0 +1,4 @@
+mapred.local.dir=#configured value of hadoop.tmp.dir it can be a list of paths comma seperated
+hadoop.pid.dir=#configured HADOOP_PID_DIR
+hadoop.indent.str=#configured HADOOP_IDENT_STR
+mapred.tasktracker.tasks.sleeptime-before-sigkill=#sleep time before sig kill is to be sent to process group after sigterm is sent. Should be in seconds
View
117 singleNodeHadoop/mapredConf/capacity-scheduler.xml
@@ -0,0 +1,117 @@
+<?xml version="1.0"?>
+
+<!-- This is the configuration file for the resource manager in Hadoop. -->
+<!-- You can configure various scheduling parameters related to queues. -->
+<!-- The properties for a queue follow a naming convention,such as, -->
+<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. -->
+
+<configuration>
+
+ <property>
+ <name>mapred.capacity-scheduler.queue.default.capacity</name>
+ <value>100</value>
+ <description>Percentage of the number of slots in the cluster that are
+ to be available for jobs in this queue.
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.capacity-scheduler.queue.default.maximum-capacity</name>
+ <value>-1</value>
+ <description>
+ maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster.
+ This provides a means to limit how much excess capacity a queue can use. By default, there is no limit.
+ The maximum-capacity of a queue can only be greater than or equal to its minimum capacity.
+ Default value of -1 implies a queue can use complete capacity of the cluster.
+
+ This property could be to curtail certain jobs which are long running in nature from occupying more than a
+ certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of
+ other queues being affected.
+
+ One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity
+ the max capacity would change. So if large no of nodes or racks get added to the cluster , max Capacity in
+ absolute terms would increase accordingly.
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.capacity-scheduler.queue.default.supports-priority</name>
+ <value>false</value>
+ <description>If true, priorities of jobs will be taken into
+ account in scheduling decisions.
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name>
+ <value>100</value>
+ <description> Each queue enforces a limit on the percentage of resources
+ allocated to a user at any given time, if there is competition for them.
+ This user limit can vary between a minimum and maximum value. The former
+ depends on the number of users who have submitted jobs, and the latter is
+ set to this property value. For example, suppose the value of this
+ property is 25. If two users have submitted jobs to a queue, no single
+ user can use more than 50% of the queue resources. If a third user submits
+ a job, no single user can use more than 33% of the queue resources. With 4
+ or more users, no user can use more than 25% of the queue's resources. A
+ value of 100 implies no user limits are imposed.
+ </description>
+ </property>
+ <property>
+ <name>mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user</name>
+ <value>2</value>
+ <description>The maximum number of jobs to be pre-initialized for a user
+ of the job queue.
+ </description>
+ </property>
+
+ <!-- The default configuration settings for the capacity task scheduler -->
+ <!-- The default values would be applied to all the queues which don't have -->
+ <!-- the appropriate property for the particular queue -->
+ <property>
+ <name>mapred.capacity-scheduler.default-supports-priority</name>
+ <value>false</value>
+ <description>If true, priorities of jobs will be taken into
+ account in scheduling decisions by default in a job queue.
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>
+ <value>100</value>
+ <description>The percentage of the resources limited to a particular user
+ for the job queue at any given point of time by default.
+ </description>
+ </property>
+
+ <property>
+ <name>mapred.capacity-scheduler.default-maximum-initialized-jobs-per-user</name>
+ <value>2</value>
+ <description>The maximum number of jobs to be pre-initialized for a user
+ of the job queue.
+ </description>
+ </property>
+
+
+ <!-- Capacity scheduler Job Initialization configuration parameters -->
+ <property>
+ <name>mapred.capacity-scheduler.init-poll-interval</name>
+ <value>5000</value>
+ <description>The amount of time in miliseconds which is used to poll
+ the job queues for jobs to initialize.
+ </description>
+ </property>
+ <property>
+ <name>mapred.capacity-scheduler.init-worker-threads</name>
+ <value>5</value>
+ <description>Number of worker threads which would be used by
+ Initialization poller to initialize jobs in a set of queue.
+ If number mentioned in property is equal to number of job queues
+ then a single thread would initialize jobs in a queue. If lesser
+ then a thread would get a set of queues assigned. If the number
+ is greater then number of threads would be equal to number of
+ job queues.
+ </description>
+ </property>
+
+</configuration>
View
24 singleNodeHadoop/mapredConf/configuration.xsl
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="html"/>
+<xsl:template match="configuration">
+<html>
+<body>
+<table border="1">
+<tr>
+ <td>name</td>
+ <td>value</td>
+ <td>description</td>
+</tr>
+<xsl:for-each select="property">
+<tr>
+ <td><a name="{name}"><xsl:value-of select="name"/></a></td>
+ <td><xsl:value-of select="value"/></td>
+ <td><xsl:value-of select="description"/></td>
+</tr>
+</xsl:for-each>
+</table>
+</body>
+</html>
+</xsl:template>
+</xsl:stylesheet>
View
23 singleNodeHadoop/mapredConf/core-site.xml
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+ <name>fs.default.name</name>
+ <value>hdfs://localhost:9000</value>
+</property>
+
+<property>
+ <name>webinterface.private.actions</name>
+ <value>true</value>
+ <description> If set to true, the web interfaces of JT and NN may contain
+ actions, such as kill job, delete file, etc., that should
+ not be exposed to public. Enable this option if the interfaces
+ are only reachable by those who have the right authorization.
+ </description>
+</property>
+
+</configuration>
View
10 singleNodeHadoop/mapredConf/corona.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<configuration>
+ <defaultSchedulingMode>FAIR</defaultSchedulingMode>
+ <nodeLocalityWaitM>0</nodeLocalityWaitM>
+ <rackLocalityWaitM>5000</rackLocalityWaitM>
+ <preemptedTaskMaxRunningTime>60000</preemptedTaskMaxRunningTime>
+ <shareStarvingRatio>0.9</shareStarvingRatio>
+ <starvingTimeForShare>60000</starvingTimeForShare>
+ <starvingTimeForMinimum>30000</starvingTimeForMinimum>
+</configuration>
View
68 singleNodeHadoop/mapredConf/hadoop-env.sh
@@ -0,0 +1,68 @@
+# Set Hadoop-specific environment variables here.
+
+# The only required environment variable is JAVA_HOME. All others are
+# optional. When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use. Required.
+# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
+
+# Extra Java CLASSPATH elements. Optional.
+# export HADOOP_CLASSPATH=
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+# export HADOOP_HEAPSIZE=2000
+
+# Extra Java runtime options. Empty by default.