Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Updated to latest.

  • Loading branch information...
commit d82610fd5883b903ad5bf1a5dca1f18274f28391 1 parent 7825058
@aching aching authored
Showing with 22,392 additions and 10,629 deletions.
  1. +41 −11 bin/hadoop
  2. +3 −2 bin/start-corona.sh
  3. 0  bin/start-fsshellservice.sh
  4. +2 −8 bin/start-multitasktracker.sh
  5. +34 −0 bin/start-namespace-notifier.sh
  6. +1 −1  bin/start-raidnode.sh
  7. 0  bin/stop-fsshellservice.sh
  8. +31 −0 bin/stop-namespace-notifier.sh
  9. +54 −9 build.xml
  10. +57 −51 conf/hadoop-env.sh
  11. +57 −51 conf/hadoop-env.sh.template
  12. +1 −1  conf/log4j.properties
  13. +1 −1  conf/log4j.properties.scribeappender
  14. 0  edit_generated_pom.py
  15. +213 −0 hdfs-autoconf/README.md
  16. +133 −0 hdfs-autoconf/avatar-datanode-start
  17. +65 −0 hdfs-autoconf/avatar-format
  18. +61 −0 hdfs-autoconf/avatar-one-start
  19. +67 −0 hdfs-autoconf/avatar-zero-start
  20. +48 −0 hdfs-autoconf/build.sh
  21. +2 −0  hdfs-autoconf/config-meta/avatar-datanode-1.sed
  22. +2 −0  hdfs-autoconf/config-meta/avatar-datanode-2.sed
  23. +6 −0 hdfs-autoconf/config-meta/avatar-datanode.template
  24. +5 −0 hdfs-autoconf/config-meta/avatar-one.sed
  25. +10 −0 hdfs-autoconf/config-meta/avatar-shared.sed
  26. +5 −0 hdfs-autoconf/config-meta/avatar-zero.sed
  27. +115 −0 hdfs-autoconf/config-templates/avatar-site.xml.template
  28. +95 −0 hdfs-autoconf/config-templates/core-site.xml.template
  29. +53 −0 hdfs-autoconf/config-templates/format-avatardatanode.sh.template
  30. +6 −0 hdfs-autoconf/config-templates/format-avatarnode-local-dir.sh.template
  31. +7 −0 hdfs-autoconf/config-templates/format-avatarnode-shared-dir.sh.template
  32. +71 −0 hdfs-autoconf/config-templates/hadoop-env-avatar-one.sh
  33. +71 −0 hdfs-autoconf/config-templates/hadoop-env-avatar-zero.sh
  34. +71 −0 hdfs-autoconf/config-templates/hadoop-env-datanode.sh
  35. +118 −0 hdfs-autoconf/config-templates/hdfs-site.xml.template
  36. +12 −0 hdfs-autoconf/config-templates/run-datanode.sh
  37. +12 −0 hdfs-autoconf/config-templates/run-one.template
  38. +12 −0 hdfs-autoconf/config-templates/run-zero.template
  39. +25 −0 hdfs-autoconf/config-templates/zoo.cfg
  40. +57 −0 hdfs-autoconf/config.sh
  41. +77 −0 hdfs-autoconf/scripts/common.sh
  42. +82 −0 hdfs-autoconf/scripts/gen-avatar
  43. +58 −0 hdfs-autoconf/scripts/gen-datanode
  44. +104 −0 hdfs-autoconf/start-dev-cluster.sh
  45. +40 −0 hdfs-autoconf/stop-dev-cluster.sh
  46. +53 −0 hdfs-autoconf/zookeeper.sh
  47. +1 −1  ivy.xml
  48. BIN  ivy/{ivy-2.0.0-rc2.jar → ivy-2.1.0.jar}
  49. +2 −2 ivy/libraries.properties
  50. BIN  lib/zookeeper-3.3.1.jar
  51. BIN  lib/zookeeper-3.4.3.jar
  52. +4 −2 singleNodeHadoop/coronaConf/corona.xml
  53. +31 −0 singleNodeHadoop/coronaConf/coronapools.xml
  54. +38 −1 singleNodeHadoop/coronaConf/mapred-site.xml
  55. +47 −0 singleNodeHadoop/mapredConf/mapred-site.xml
  56. +3 −0  singleNodeHadoop/mapredConf/pools.xml
  57. +4 −1 singleNodeHadoop/singleNodeSwitch.sh
  58. +0 −43 src/contrib/benchmark/build.xml
  59. +0 −46 src/contrib/benchmark/ivy.xml
  60. +0 −4 src/contrib/benchmark/ivy/libraries.properties
  61. +0 −45 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/AllTestDriver.java
  62. +0 −16 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/Constant.java
  63. +0 −174 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/DFSDirTest.java
  64. +0 −452 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/DFSGeneralTest.java
  65. +0 −199 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/DFSIOTest.java
  66. +0 −256 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/DFSLockTest.java
  67. +0 −187 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/DataGenerator.java
  68. +0 −30 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/DirConstant.java
  69. +0 −93 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/FileNameGenerator.java
  70. +0 −41 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/GeneralConstant.java
  71. +0 −469 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/LoadGenerator.java
  72. +0 −1,357 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/NNThroughputBenchmark.java
  73. +0 −307 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/StructureGenerator.java
  74. +0 −49 src/contrib/benchmark/src/java/org/apache/hadoop/hdfs/TestNNThroughputBenchmark.java
  75. +0 −61 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/CreateFiles.java
  76. +0 −605 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/DatanodeBenThread.java
  77. +0 −114 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/DirMapper.java
  78. +0 −107 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/DirReduce.java
  79. +0 −79 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/GenMapper.java
  80. +0 −237 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/GenReaderThread.java
  81. +0 −137 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/GenReduce.java
  82. +0 −154 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/GenThread.java
  83. +0 −331 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/GenWriterThread.java
  84. +0 −69 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/MultiTaskTracker.java
  85. +0 −68 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/ReadMapper.java
  86. +0 −99 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/Reduce.java
  87. +0 −262 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/SleepJobRunner.java
  88. +0 −102 src/contrib/benchmark/src/java/org/apache/hadoop/mapred/WriteMapper.java
  89. +0 −108 src/contrib/benchmark/src/test/org/apache/hadoop/hashtable/CuckooHash.java
  90. +0 −114 src/contrib/benchmark/src/test/org/apache/hadoop/hashtable/DoubleHash.java
  91. +0 −248 src/contrib/benchmark/src/test/org/apache/hadoop/hashtable/HashTableBenchmark.java
  92. +0 −119 src/contrib/benchmark/src/test/org/apache/hadoop/hashtable/Hashes.java
  93. +0 −231 src/contrib/benchmark/src/test/org/apache/hadoop/hashtable/LightWeightGSet.java
  94. +0 −252 src/contrib/benchmark/src/test/org/apache/hadoop/hashtable/LightWeightGSetMulti.java
  95. +0 −45 src/contrib/benchmark/src/test/org/apache/hadoop/hashtable/MT.java
  96. +0 −110 src/contrib/benchmark/src/test/org/apache/hadoop/hashtable/QuadHash.java
  97. +0 −69 src/contrib/benchmark/src/test/org/apache/hadoop/hashtable/R250_521.java
  98. +0 −82 src/contrib/benchmark/src/test/org/apache/hadoop/hashtable/TestHashTableBenchmark.java
  99. +6 −3 src/contrib/build-contrib.xml
  100. +16 −11 src/contrib/build.xml
  101. +17 −0 src/contrib/corona/README.txt
  102. +1 −0  src/contrib/corona/build.xml
  103. +86 −12 src/contrib/corona/interface/ClusterManager.thrift
  104. +4 −1 src/contrib/corona/ivy/libraries.properties
  105. +8,135 −1,224 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/ClusterManagerService.java
  106. +46 −46 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/ClusterNodeInfo.java
  107. +1,234 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/CoronaProxyJobTrackerService.java
  108. +411 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/HeartbeatArgs.java
  109. +322 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/InvalidPoolInfo.java
  110. +323 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/NodeHeartbeatResponse.java
  111. +414 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/PoolInfoStrings.java
  112. +411 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/RestartNodesArgs.java
  113. +224 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/RestartNodesResponse.java
  114. +932 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/RunningSession.java
  115. +224 −0 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/SafeModeException.java
  116. +18 −18 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/SessionDriverService.java
  117. +48 −47 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/SessionInfo.java
  118. +50 −49 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/SessionRegistrationData.java
  119. +4 −1 src/contrib/corona/src/gen-java/org/apache/hadoop/corona/SessionStatus.java
  120. +56 −0 src/contrib/corona/src/java/error.xml
  121. +372 −41 src/contrib/corona/src/java/org/apache/hadoop/corona/ClusterManager.java
  122. +93 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/ClusterManagerAvailabilityChecker.java
  123. +38 −3 src/contrib/corona/src/java/org/apache/hadoop/corona/ClusterManagerMetrics.java
  124. +34 −32 src/contrib/corona/src/java/org/apache/hadoop/corona/ClusterManagerServer.java
  125. +266 −56 src/contrib/corona/src/java/org/apache/hadoop/corona/ClusterNode.java
  126. +836 −172 src/contrib/corona/src/java/org/apache/hadoop/corona/ConfigManager.java
  127. +139 −1 src/contrib/corona/src/java/org/apache/hadoop/corona/CoronaAdmin.java
  128. +211 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/CoronaClient.java
  129. +189 −9 src/contrib/corona/src/java/org/apache/hadoop/corona/CoronaConf.java
  130. +145 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/CoronaNodeRestarter.java
  131. +48 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/EmptyPoolsConfigDocumentGenerator.java
  132. +1 −1  src/contrib/corona/src/java/org/apache/hadoop/corona/FaultManager.java
  133. +70 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/LoadManager.java
  134. +101 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/NodeContainer.java
  135. +613 −197 src/contrib/corona/src/java/org/apache/hadoop/corona/NodeManager.java
  136. +111 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/NodeSnapshot.java
  137. +268 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/PoolGroupManager.java
  138. +203 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/PoolGroupSchedulable.java
  139. +266 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/PoolInfo.java
  140. +37 −15 src/contrib/corona/src/java/org/apache/hadoop/corona/{PoolMetrics.java → PoolInfoMetrics.java}
  141. +0 −212 src/contrib/corona/src/java/org/apache/hadoop/corona/PoolManager.java
  142. +49 −23 src/contrib/corona/src/java/org/apache/hadoop/corona/PoolSchedulable.java
  143. +24 −44 .../hashtable/LongInfo.java → corona/src/java/org/apache/hadoop/corona/PoolsConfigDocumentGenerator.java}
  144. +77 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/RequestedNode.java
  145. +121 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/ResourceLimit.java
  146. +107 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/ResourceRequestInfo.java
  147. +77 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/ResourceTypeProperties.java
  148. +25 −1 src/contrib/corona/src/java/org/apache/hadoop/corona/RetiredSession.java
  149. +15 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/Schedulable.java
  150. +35 −24 src/contrib/corona/src/java/org/apache/hadoop/corona/Scheduler.java
  151. +444 −173 src/contrib/corona/src/java/org/apache/hadoop/corona/SchedulerForType.java
  152. +707 −77 src/contrib/corona/src/java/org/apache/hadoop/corona/Session.java
  153. +389 −37 src/contrib/corona/src/java/org/apache/hadoop/corona/SessionDriver.java
  154. +9 −27 src/contrib/corona/src/java/org/apache/hadoop/corona/SessionHistoryManager.java
  155. +408 −86 src/contrib/corona/src/java/org/apache/hadoop/corona/SessionManager.java
  156. +112 −3 src/contrib/corona/src/java/org/apache/hadoop/corona/SessionNotificationCtx.java
  157. +181 −11 src/contrib/corona/src/java/org/apache/hadoop/corona/SessionNotifier.java
  158. +19 −5 src/contrib/corona/src/java/org/apache/hadoop/corona/SessionSchedulable.java
  159. +39 −13 src/contrib/corona/src/java/org/apache/hadoop/corona/TFactoryBasedThreadPoolServer.java
  160. +0 −99 src/contrib/corona/src/java/org/apache/hadoop/corona/TopologyCache.java
  161. +84 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/TypePoolGroupNameMap.java
  162. +84 −0 src/contrib/corona/src/java/org/apache/hadoop/corona/TypePoolInfoMap.java
  163. +67 −3 src/contrib/corona/src/java/org/apache/hadoop/corona/Utilities.java
  164. +46 −0 src/contrib/corona/src/java/org/apache/hadoop/mapred/ClusterManagerSafeModeProtocol.java
  165. +5 −0 src/contrib/corona/src/java/org/apache/hadoop/mapred/CoronaJobAggregator.java
  166. +77 −139 src/contrib/corona/src/java/org/apache/hadoop/mapred/CoronaJobInProgress.java
Sorry, we could not display the entire diff because too many files (927) changed.
View
52 bin/hadoop
@@ -72,6 +72,7 @@ if [ $# = 0 ]; then
echo " jmxget get JMX exported values from NameNode or DataNode."
echo " oiv apply the offline fsimage viewer to an fsimage"
echo " oev apply the offline edits viewer to an edits file"
+ echo " oid apply the offline fsimage decompressor to an fsimage"
echo " Use -help to see options"
echo " jobtracker run the MapReduce job Tracker node"
echo " pipes run a Pipes job"
@@ -123,6 +124,7 @@ fi
# CLASSPATH initially contains $HADOOP_CONF_DIR
JMX_OPTS=""
CLASSPATH="${HADOOP_CONF_DIR}"
+CLASSPATH=${CLASSPATH}:$HADOOP_CLASSPATH
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
# for developers, add Hadoop classes to CLASSPATH
@@ -175,12 +177,6 @@ for f in $HADOOP_HOME/build/hadoop-*-tools.jar; do
TOOL_PATH=${TOOL_PATH}:$f;
done
-# add user-specified CLASSPATH before corona so that a newer
-# corona jar can be specified to override the deployed one
-if [ "$HADOOP_CLASSPATH" != "" ]; then
- CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
-fi
-
# CORONA_PATH for corona daemons
if [ -d "$HADOOP_HOME/build/contrib/corona/classes" ]; then
CORONA_PATH=${CORONA_PATH}:$HADOOP_HOME/build/contrib/corona/classes
@@ -198,6 +194,15 @@ for f in $HADOOP_HOME/contrib/corona/lib/*.jar; do
CORONA_LIB_PATH=${CORONA_LIB_PATH}:$f;
done
+# NOTIFIER_PATH for the namespace notifier server daemon
+if [ -d "$HADOOP_HOME/build/contrib/namespace-notifier/classes" ]; then
+ NOTIFIER_PATH=${NOTIFIER_PATH}:$HADOOP_HOME/build/contrib/namespace-notifier/classes
+fi
+
+for f in $HADOOP_HOME/contrib/namespace-notifier/*.jar; do
+ NOTIFIER_PATH=${NOTIFIER_PATH}:$f;
+done
+
# default log directory & file
if [ "$HADOOP_LOG_DIR" = "" ]; then
HADOOP_LOG_DIR="$HADOOP_HOME/logs"
@@ -243,6 +248,17 @@ elif [ "$COMMAND" = "avatarnode" ] ; then
elif [ "$COMMAND" = "secondarynamenode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
+elif [ "$COMMAND" = "raidnode" ] ; then
+ CLASS='org.apache.hadoop.raid.RaidNode'
+ JMX_OPTS=$HADOOP_JMX_RAIDNODE_OPTS
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS"
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
+elif [ "$COMMAND" = "notifier" ] ; then
+ CLASS='org.apache.hadoop.hdfs.notifier.server.ServerCore'
+ if [ "$NOTIFIER_PATH" != "" ]; then
+ CLASSPATH=${CLASSPATH}:${NOTIFIER_PATH}
+ fi
+ JMX_OPTS="$JMX_OPTS $NOTIFIER_JMX_OPTS"
elif [ "$COMMAND" = "fsshellservice" ] ; then
CLASS='org.apache.hadoop.hdfs.fsshellservice.FsShellServiceImpl'
if [ -d "$HADOOP_HOME/build/contrib/corona/lib" ]; then
@@ -309,6 +325,9 @@ elif [ "$COMMAND" = "oiv" ] ; then
elif [ "$COMMAND" = "oev" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "oid" ] ; then
+ CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageDecompressor
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "jmxget" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
@@ -324,17 +343,26 @@ elif [ "$COMMAND" = "coronaclustermanager" ] ; then
JMX_OPTS=$HADOOP_JMX_CORONACLUSTERMANAGER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_CORONACLUSTERMANAGER_OPTS"
# Corona lib path should be first to ensure that it uses the right thrift JAR
- CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
+ CLASSPATH=${CORONA_LIB_PATH}:${CLUSTER_MANAGER_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronatasktracker" ] ; then
CLASS=org.apache.hadoop.mapred.CoronaTaskTracker
JMX_OPTS=$HADOOP_JMX_TASKTRACKER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_TASKTRACKER_OPTS"
+ # For corona task trackers, the tasks should not get the thrift library.
+ MAPREDUCE_TASK_SYSTEM_CLASSPATH=${CLASSPATH}
+ export MAPREDUCE_TASK_SYSTEM_CLASSPATH
# See coronaclustermanager comment
CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronaproxyjobtracker" ] ; then
CLASS=org.apache.hadoop.mapred.ProxyJobTracker
JMX_OPTS=$HADOOP_JMX_CORONAPROXYJOBTRACKER_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_GC_LOG_OPTS $HADOOP_CORONAPROXYJOBTRACKER_OPTS"
+ # See coronaclustermanager comment
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
+elif [ "$COMMAND" = "coronaclient" ] ; then
+ CLASS=org.apache.hadoop.corona.CoronaClient
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}
elif [ "$COMMAND" = "coronaadmin" ] ; then
CLASS=org.apache.hadoop.corona.CoronaAdmin
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
@@ -350,9 +378,10 @@ elif [ "$COMMAND" = "tasktracker" ] ; then
elif [ "$COMMAND" = "multitasktracker" ] ; then
CLASS=org.apache.hadoop.mapred.MultiTaskTracker
HADOOP_ROOT_LOGGER=${HADOOP_TASKTRACKER_LOGGER:-$HADOOP_ROOT_LOGGER}
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_MULTITASKTRACKER_OPTS"
# This should be the number of tasktrackers
- if [ -n "$MULTI_TT_SIZE" ] ; then
- CMDLINE_OPTS="$MULTI_TT_SIZE"
+ if [ -n "$MULTI_TT_OPTIONS" ] ; then
+ CMDLINE_OPTS="$MULTI_TT_OPTIONS"
else
CMDLINE_OPTS="1"
fi
@@ -374,7 +403,7 @@ elif [ "$COMMAND" = "fastcopy" ] ; then
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "distcp" ] ; then
CLASS=org.apache.hadoop.tools.DistCp
- CLASSPATH=${CLASSPATH}:${TOOL_PATH}
+ CLASSPATH=${CORONA_LIB_PATH}:${CLASSPATH}:${TOOL_PATH}
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "daemonlog" ] ; then
CLASS=org.apache.hadoop.log.LogLevel
@@ -463,4 +492,5 @@ if [ "$HADOOP_DEPLOYMENT" == "server" ]; then
fi
# run it
-exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $JMX_OPTS -classpath "$CLASSPATH" $CLASS $CMDLINE_OPTS "$@"
+export CLASSPATH
+exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $JMX_OPTS "-Dfb_hadoop_version=0.20" $CLASS $CMDLINE_OPTS "$@"
View
5 bin/start-corona.sh
@@ -24,7 +24,8 @@ bin=`cd "$bin"; pwd`
. "$bin"/hadoop-config.sh
# start corona daemons
-# start clustermanager first to minimize connection errors at startup
-"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start coronaclustermanager
+# start start-proxyjt.sh first so that clustermanager can be started correctly
"$bin"/start-proxyjt.sh --config $HADOOP_CONF_DIR
+sleep 1
+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start coronaclustermanager
"$bin"/hadoop-daemons.sh --config $HADOOP_CONF_DIR start coronatasktracker
View
0  bin/start-fsshellservice.sh 100755 → 100644
File mode changed
View
10 bin/start-multitasktracker.sh
@@ -30,13 +30,7 @@ for f in "$bin"/../build/contrib/*/*.jar; do
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:$f;
done
-export HADOOP_OPTS="$HADOOP_OPTS -Dcom.sun.management.jmxremote
--Dcom.sun.management.jmxremote.port=8697 \
--Dcom.sun.management.jmxremote.authenticate=false
--Dcom.sun.management.jmxremote.ssl=false \
--verbose:gc -XX:+PrintGCDateStamps -XX:+PrintGCDetails \
--Xloggc:/usr/local/hadoop/logs/MRSIM/multitasktracker.gc.log \
--XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC"
+export HADOOP_MULTITASKTRACKER_OPTS=" -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=8697 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -verbose:gc -XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:/usr/local/hadoop/logs/MRSIM/multitasktracker.gc.log -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC"
# start mapred daemons
# start jobtracker first to minimize connection errors at startup
-"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start multitasktracker
+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start multitasktracker
View
34 bin/start-namespace-notifier.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+usage="Usage: start-namespace-notifier.sh"
+
+params=$#
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hadoop-config.sh
+. "$bin"/../conf/hadoop-env.sh
+
+# get arguments
+if [ $# -ge 1 ]; then
+ echo $usage
+fi
+
+export NOTIFIER_JMX_OPTS=" -Dcom.sun.management.jmxremote.port=$NOTIFIER_JMX_PORT -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
+
+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start notifier
View
2  bin/start-raidnode.sh
@@ -37,4 +37,4 @@ if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
fi
export HADOOP_DAEMON_OPTS=$HADOOP_RAIDNODE_OPTS
-"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start org.apache.hadoop.raid.RaidNode
+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start raidnode
View
0  bin/stop-fsshellservice.sh 100755 → 100644
File mode changed
View
31 bin/stop-namespace-notifier.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+usage="Usage: stop-namespace-notifier.sh"
+
+params=$#
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hadoop-config.sh
+
+# get arguments
+if [ $# -ge 1 ]; then
+ echo $usage
+fi
+
+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop notifier
View
63 build.xml
@@ -107,7 +107,7 @@
<property name="test.junit.fork.mode" value="perTest" />
<property name="test.junit.printsummary" value="yes" />
<property name="test.junit.haltonfailure" value="no" />
- <property name="test.junit.maxmemory" value="512m" />
+ <property name="test.junit.maxmemory" value="1024m" />
<property name="test.tools.input.dir" value="${basedir}/src/test/tools/data"/>
<!-- The default user.home might have lots of dirs that will slow down
tets that scan user.home, so allow a way to specify a custom,
@@ -229,6 +229,14 @@
</and>
</condition>
+ <!-- Indicate is Snappy native library should be bundled with Hadoop or not -->
+ <property name="bundle.snappy" value="true"/>
+
+ <!-- Snappy native library location -->
+ <property name="snappy.prefix" value="${native.src.dir}/lib/snappy"/>
+ <property name="snappy.lib" value="${snappy.prefix}/lib"/>
+ <property name="snappy.include" value="${snappy.prefix}/include"/>
+
<!-- the normal classpath -->
<path id="classpath">
<pathelement location="${build.classes}"/>
@@ -499,6 +507,7 @@
<mkdir dir="${build.native}/lib"/>
<mkdir dir="${build.native}/src/org/apache/hadoop/io/compress/zlib"/>
+ <mkdir dir="${build.native}/src/org/apache/hadoop/io/compress/snappy"/>
<mkdir dir="${build.native}/src/org/apache/hadoop/io/compress/lzma"/>
<mkdir dir="${build.native}/src/org/apache/hadoop/syscall"/>
<mkdir dir="${build.native}/src/org/apache/hadoop/util"/>
@@ -559,8 +568,20 @@
<env key="OS_ARCH" value="${os.arch}"/>
<env key="JVM_DATA_MODEL" value="${sun.arch.data.model}"/>
<env key="HADOOP_NATIVE_SRCDIR" value="${native.src.dir}"/>
- <arg line="${native.src.dir}/configure LDFLAGS='-L${basedir}/nativelib/lzma' CPPFLAGS='-I${basedir}/nativelib/lzma'"/>
- </exec>
+ <arg line="${native.src.dir}/configure
+ LDFLAGS='-L${snappy.lib} -L${basedir}/nativelib/lzma -lrt'
+ CPPFLAGS='-I${snappy.include} -I${basedir}/nativelib/lzma'"/>
+ </exec>
+
+ <javah
+ classpath="${build.classes}"
+ destdir="${build.native}/src/org/apache/hadoop/io/compress/snappy"
+ force="yes"
+ verbose="yes"
+ >
+ <class name="org.apache.hadoop.io.compress.snappy.SnappyCompressor"/>
+ <class name="org.apache.hadoop.io.compress.snappy.SnappyDecompressor"/>
+ </javah>
<exec dir="${build.native}" executable="${make.cmd}" failonerror="true">
<env key="OS_NAME" value="${os.name}"/>
@@ -569,8 +590,16 @@
<env key="HADOOP_NATIVE_SRCDIR" value="${native.src.dir}"/>
</exec>
- <exec dir="${build.native}" executable="sh" failonerror="true">
- <arg line="${build.native}/libtool --mode=install cp ${build.native}/lib/libhadoop.la ${build.native}/lib"/>
+ <delete>
+ <fileset dir="${build.native}/lib" includes="libsnappy.*"/>
+ </delete>
+ <copy todir="${build.native}/lib">
+ <fileset dir="${snappy.lib}" includes="libsnappy.*"/>
+ <fileset dir="${build.native}/src/org/apache/hadoop/io/compress/snappy/.libs/" includes="libhadoopsnappy.*"/>
+ </copy>
+
+ <exec dir="${build.native}" executable="sh" failonerror="true">
+ <arg line="${build.native}/libtool --mode=install cp ${build.native}/lib/libhadoop.la ${build.native}/lib"/>
</exec>
<delete>
<fileset dir="${build.native}/lib" includes="liblzma.so*"/>
@@ -763,6 +792,7 @@
<mkdir dir="${test.debug.data}"/>
<copy file="${test.src.dir}/org/apache/hadoop/mapred/testscript.txt" todir="${test.debug.data}"/>
<copy file="${test.src.dir}/org/apache/hadoop/mapred/test.txt" todir="${test.cache.data}"/>
+ <copy file="${test.src.dir}/org/apache/hadoop/io/compress/snappy/testsnappy.txt" todir="${test.cache.data}"/>
<copy file="${test.src.dir}/org/apache/hadoop/mapred/test.jar" todir="${test.cache.data}"/>
<copy file="${test.src.dir}/org/apache/hadoop/mapred/test.zip" todir="${test.cache.data}"/>
<copy file="${test.src.dir}/org/apache/hadoop/mapred/test.tar" todir="${test.cache.data}"/>
@@ -842,7 +872,7 @@
<sysproperty key="test.build.extraconf" value="${test.build.extraconf}" />
<sysproperty key="hadoop.policy.file" value="hadoop-policy.xml"/>
<sysproperty key="java.library.path"
- value="${build.native}/lib:${lib.dir}/native/${build.platform}"/>
+ value="${build.native}/lib:${lib.dir}/native/${build.platform}:${snappy.lib}"/>
<sysproperty key="install.c++.examples" value="${install.c++.examples}"/>
<sysproperty key="user.home" value="${test.user.home}"/>
<env key="LD_LIBRARY_PATH" value="${build.native}/lib${path.separator}${env.LD_LIBRARY_PATH}"/>
@@ -905,7 +935,7 @@
</subant>
</target>
- <target name="test" depends="clean, compile" description="Run core, contrib unit tests">
+ <target name="test" depends="clean, compile, compile-native" description="Run core, contrib unit tests">
<subant target="test-core" failonerror="false">
<fileset file="build.xml"/>
</subant>
@@ -1214,6 +1244,8 @@
<env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
<env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
<env key="DIST_LIB_DIR" value="${dist.dir}/lib/native"/>
+ <env key="BUNDLE_SNAPPY_LIB" value="${bundle.snappy}"/>
+ <env key="SNAPPY_LIB_DIR" value="${snappy.prefix}/lib"/>
<arg line="${native.src.dir}/packageNativeHadoop.sh"/>
</exec>
@@ -1306,6 +1338,17 @@
</macro_tar>
</target>
+ <target name="superjar" depends="bin-package">
+ <jar jarfile="${build.dir}/${final.name}-super.jar">
+ <zipgroupfileset dir="lib" includes="*jar"/>
+ <zipgroupfileset dir="build" includes="*jar"/>
+ <zipgroupfileset dir="build/contrib/corona" includes="*jar"/>
+ <zipgroupfileset dir="build/contrib/corona/lib" includes="*jar"/>
+ <zipgroupfileset dir="build/ivy/lib/Hadoop/common/" includes="*jar"/>
+ <zipgroupfileset dir="build/ivy/lib/corona/common/" includes="*jar"/>
+ </jar>
+ </target>
+
<target name="bin-package" depends="compile, jar, examples, tools-jar, jar-test, ant-tasks, package-librecordio"
description="assembles artifacts for binary target">
<mkdir dir="${dist.dir}"/>
@@ -1327,6 +1370,8 @@
<env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
<env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
<env key="DIST_LIB_DIR" value="${dist.dir}/lib/native"/>
+ <env key="BUNDLE_SNAPPY_LIB" value="${bundle.snappy}"/>
+ <env key="SNAPPY_LIB_DIR" value="${snappy.prefix}/lib"/>
<arg line="${native.src.dir}/packageNativeHadoop.sh"/>
</exec>
@@ -2013,8 +2058,8 @@
</target>
<!-- end of task-controller target -->
- <target name="print-version">
- <echo message="${version}" />
+ <target name="print-version">
+ <echo message="${version}" />
</target>
</project>
View
108 conf/hadoop-env.sh
@@ -1,68 +1,74 @@
-# Set Hadoop-specific environment variables here.
+# This if statement insures that this file will be sources only once
+if [[ "$IS_HADOOP_ENV_ALREADY_SOURCED" != "true" ]]; then
+ export IS_HADOOP_ENV_ALREADY_SOURCED="true";
-# The only required environment variable is JAVA_HOME. All others are
-# optional. When running a distributed configuration it is best to
-# set JAVA_HOME in this file, so that it is correctly defined on
-# remote nodes.
+ # Set Hadoop-specific environment variables here.
-# The java implementation to use. Required.
-# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
+ # The only required environment variable is JAVA_HOME. All others are
+ # optional. When running a distributed configuration it is best to
+ # set JAVA_HOME in this file, so that it is correctly defined on
+ # remote nodes.
-# Extra Java CLASSPATH elements. Optional.
-# export HADOOP_CLASSPATH=
+ # The java implementation to use. Required.
+ # export JAVA_HOME=/usr/lib/j2sdk1.5-sun
-# The maximum amount of heap to use, in MB. Default is 1000.
-# export HADOOP_HEAPSIZE=2000
+ # Extra Java CLASSPATH elements. Optional.
+ # export HADOOP_CLASSPATH=
-# Extra Java runtime options. Empty by default.
-# export HADOOP_OPTS=-server
+ # The maximum amount of heap to use, in MB. Default is 1000.
+ # export HADOOP_HEAPSIZE=2000
-# Command specific options appended to HADOOP_OPTS when specified
-export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
-export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
-export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
-export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
-export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
-export HADOOP_RAIDNODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_RAIDNODE_OPTS"
-export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote.port=8998 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
+ # Extra Java runtime options. Empty by default.
+ # export HADOOP_OPTS=-server
-# The only user who can start hadoop daemons.
-# If this is not set, any user can start hadoop daemons.
-export HADOOP_USERNAME="hadoop"
+ # Command specific options appended to HADOOP_OPTS when specified
+ export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
+ export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
+ export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
+ export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
+ export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
+ export HADOOP_RAIDNODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_RAIDNODE_OPTS"
+ export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote.port=8998 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
-# Java Runtime garbage collection options to pass to all Hadoop
-# servers (Namenode, Jobtracker, Datanode, Tasktracker). This must end
-# with a colon ; to which the dynamically generated gc log filename will
-# be appended to. The below defaults work for the Sun JVM, for example
-# in IBM GC, use '-Xverbosegclog:'.
-#export HADOOP_GC_LOG_OPTS="-XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:"
+ # The only user who can start hadoop daemons.
+ # If this is not set, any user can start hadoop daemons.
+ export HADOOP_USERNAME="hadoop"
-# export HADOOP_TASKTRACKER_OPTS=
-# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
-# export HADOOP_CLIENT_OPTS
+ # Java Runtime garbage collection options to pass to all Hadoop
+ # servers (Namenode, Jobtracker, Datanode, Tasktracker). This must end
+ # with a colon ; to which the dynamically generated gc log filename will
+ # be appended to. The below defaults work for the Sun JVM, for example
+ # in IBM GC, use '-Xverbosegclog:'.
+ #export HADOOP_GC_LOG_OPTS="-XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:"
-# Extra ssh options. Empty by default.
-# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
+ # export HADOOP_TASKTRACKER_OPTS=
+ # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+ # export HADOOP_CLIENT_OPTS
-# Where log files are stored. $HADOOP_HOME/logs by default.
-# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
+ # Extra ssh options. Empty by default.
+ # export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
-# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
-# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
+ # Where log files are stored. $HADOOP_HOME/logs by default.
+ # export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
-# host:path where hadoop code should be rsync'd from. Unset by default.
-# export HADOOP_MASTER=master:/home/$USER/src/hadoop
+ # File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
+ # export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
-# Seconds to sleep between slave commands. Unset by default. This
-# can be useful in large clusters, where, e.g., slave rsyncs can
-# otherwise arrive faster than the master can service them.
-# export HADOOP_SLAVE_SLEEP=0.1
+ # host:path where hadoop code should be rsync'd from. Unset by default.
+ # export HADOOP_MASTER=master:/home/$USER/src/hadoop
-# The directory where pid files are stored. /tmp by default.
-# export HADOOP_PID_DIR=/var/hadoop/pids
+ # Seconds to sleep between slave commands. Unset by default. This
+ # can be useful in large clusters, where, e.g., slave rsyncs can
+ # otherwise arrive faster than the master can service them.
+ # export HADOOP_SLAVE_SLEEP=0.1
-# A string representing this instance of hadoop. $USER by default.
-# export HADOOP_IDENT_STRING=$USER
+ # The directory where pid files are stored. /tmp by default.
+ # export HADOOP_PID_DIR=/var/hadoop/pids
-# The scheduling priority for daemon processes. See 'man nice'.
-# export HADOOP_NICENESS=10
+ # A string representing this instance of hadoop. $USER by default.
+ # export HADOOP_IDENT_STRING=$USER
+
+ # The scheduling priority for daemon processes. See 'man nice'.
+ # export HADOOP_NICENESS=10
+
+fi
View
108 conf/hadoop-env.sh.template
@@ -1,68 +1,74 @@
-# Set Hadoop-specific environment variables here.
+# This if statement insures that this file will be sources only once
+if [[ "$IS_HADOOP_ENV_ALREADY_SOURCED" != "true" ]]; then
+ export IS_HADOOP_ENV_ALREADY_SOURCED="true";
-# The only required environment variable is JAVA_HOME. All others are
-# optional. When running a distributed configuration it is best to
-# set JAVA_HOME in this file, so that it is correctly defined on
-# remote nodes.
+ # Set Hadoop-specific environment variables here.
-# The java implementation to use. Required.
-# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
+ # The only required environment variable is JAVA_HOME. All others are
+ # optional. When running a distributed configuration it is best to
+ # set JAVA_HOME in this file, so that it is correctly defined on
+ # remote nodes.
-# Extra Java CLASSPATH elements. Optional.
-# export HADOOP_CLASSPATH=
+ # The java implementation to use. Required.
+ # export JAVA_HOME=/usr/lib/j2sdk1.5-sun
-# The maximum amount of heap to use, in MB. Default is 1000.
-# export HADOOP_HEAPSIZE=2000
+ # Extra Java CLASSPATH elements. Optional.
+ # export HADOOP_CLASSPATH=
-# Extra Java runtime options. Empty by default.
-# export HADOOP_OPTS=-server
+ # The maximum amount of heap to use, in MB. Default is 1000.
+ # export HADOOP_HEAPSIZE=2000
-# Command specific options appended to HADOOP_OPTS when specified
-export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
-export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
-export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
-export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
-export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
-export HADOOP_RAIDNODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_RAIDNODE_OPTS"
-export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote.port=8998 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
+ # Extra Java runtime options. Empty by default.
+ # export HADOOP_OPTS=-server
-# The only user who can start hadoop daemons.
-# If this is not set, any user can start hadoop daemons.
-export HADOOP_USERNAME="hadoop"
+ # Command specific options appended to HADOOP_OPTS when specified
+ export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
+ export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
+ export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
+ export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
+ export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
+ export HADOOP_RAIDNODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_RAIDNODE_OPTS"
+ export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote.port=8998 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
-# Java Runtime garbage collection options to pass to all Hadoop
-# servers (Namenode, Jobtracker, Datanode, Tasktracker). This must end
-# with a colon ; to which the dynamically generated gc log filename will
-# be appended to. The below defaults work for the Sun JVM, for example
-# in IBM GC, use '-Xverbosegclog:'.
-#export HADOOP_GC_LOG_OPTS="-XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:"
+ # The only user who can start hadoop daemons.
+ # If this is not set, any user can start hadoop daemons.
+ export HADOOP_USERNAME="hadoop"
-# export HADOOP_TASKTRACKER_OPTS=
-# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
-# export HADOOP_CLIENT_OPTS
+ # Java Runtime garbage collection options to pass to all Hadoop
+ # servers (Namenode, Jobtracker, Datanode, Tasktracker). This must end
+ # with a colon ; to which the dynamically generated gc log filename will
+ # be appended to. The below defaults work for the Sun JVM, for example
+ # in IBM GC, use '-Xverbosegclog:'.
+ #export HADOOP_GC_LOG_OPTS="-XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:"
-# Extra ssh options. Empty by default.
-# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
+ # export HADOOP_TASKTRACKER_OPTS=
+ # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+ # export HADOOP_CLIENT_OPTS
-# Where log files are stored. $HADOOP_HOME/logs by default.
-# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
+ # Extra ssh options. Empty by default.
+ # export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
-# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
-# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
+ # Where log files are stored. $HADOOP_HOME/logs by default.
+ # export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
-# host:path where hadoop code should be rsync'd from. Unset by default.
-# export HADOOP_MASTER=master:/home/$USER/src/hadoop
+ # File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
+ # export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
-# Seconds to sleep between slave commands. Unset by default. This
-# can be useful in large clusters, where, e.g., slave rsyncs can
-# otherwise arrive faster than the master can service them.
-# export HADOOP_SLAVE_SLEEP=0.1
+ # host:path where hadoop code should be rsync'd from. Unset by default.
+ # export HADOOP_MASTER=master:/home/$USER/src/hadoop
-# The directory where pid files are stored. /tmp by default.
-# export HADOOP_PID_DIR=/var/hadoop/pids
+ # Seconds to sleep between slave commands. Unset by default. This
+ # can be useful in large clusters, where, e.g., slave rsyncs can
+ # otherwise arrive faster than the master can service them.
+ # export HADOOP_SLAVE_SLEEP=0.1
-# A string representing this instance of hadoop. $USER by default.
-# export HADOOP_IDENT_STRING=$USER
+ # The directory where pid files are stored. /tmp by default.
+ # export HADOOP_PID_DIR=/var/hadoop/pids
-# The scheduling priority for daemon processes. See 'man nice'.
-# export HADOOP_NICENESS=10
+ # A string representing this instance of hadoop. $USER by default.
+ # export HADOOP_IDENT_STRING=$USER
+
+ # The scheduling priority for daemon processes. See 'man nice'.
+ # export HADOOP_NICENESS=10
+
+fi
View
2  conf/log4j.properties
@@ -34,7 +34,7 @@ log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
# Pattern format: Date LogLevel LoggerName LogMessage
-log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c{1}: %m%n
# Debugging Pattern format
#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
View
2  conf/log4j.properties.scribeappender
@@ -27,7 +27,7 @@ log4j.appender.DRFA.DatePattern=.yyyy-MM-dd-HH
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
# Pattern format: Date LogLevel LoggerName LogMessage
-log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c{1}: %m%n
# Debugging Pattern format
#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
View
0  edit_generated_pom.py 100755 → 100644
File mode changed
View
213 hdfs-autoconf/README.md
@@ -0,0 +1,213 @@
+What is this?
+=============
+
+This is autoconfigurator and autolauncher for a local HDFS cluster.
+It is supposed to be mainly used for developer purposes, and it provides
+you with bunch of scripts for setting everything up in a minute.. or maybe two.
+Enjoy!
+
+DISCLAIMER: The scripts are written and tested on the GNU system and relies
+on GNU tools. At least two of them (`sed` & `readlink`) are known
+to be incompatible with their BSD implementations.
+
+
+
+STARTING CLUSTER
+================
+
+1. Make sure you have a zookeeper quorum started somewhere and that file
+ `config-meta/avatar-shared.sed` has a `zookeeper-quorum` entry that points
+ to the quorum. If not, you can start a local zookeeper via
+ `zookeeper.sh start` command
+2. `./build.sh` - builds all sources needed to start HDFS cluster
+3. `./avatar-format` - formats cluster directories
+4. `./start-dev-cluster --count 3` - starts local cluster with 3 datanodes.
+
+[OPTIONAL] If you want to change any `core-site.xml` or `hdfs-site.xml`
+ properties, make the necessary changes in the `config-templates/core-site.xml.template` and
+ `config-meta/hdfs-site.xml.template` files. If you want to configure cluster
+ directories, please refer to FAQ questions "Where do namenodes store their data?" and
+ "Where do datanodes store their data?".
+
+
+
+F.A.Q
+=====
+
+Where do I find cluster log files?
+----------------------------------
+
+Logs directory is specified by `$LOGS_DIRECTORY` variable, which defauls to
+`$HADOOP_VERSION/logs`.
+
+
+Where do namenodes store their data?
+------------------------------------
+
+1. The directory that is used as a local directory for the active namenode is
+ specified in the `./config-meta/avatar-zero.sed` file.
+2. Similar to the active namenode, the local directory for the standby
+ specified in the `./config-meta/avatar-one.sed` file.
+3. The shared namenodes directory is specified in the
+ `./config-meta/avatar-shared.sed` file
+
+
+Where do datanodes store their data?
+------------------------------------
+
+Each datanode has a set of volumes, and autotool maps volumes
+to distinct local directories. These directories are specified in
+datanode configuration file which is only one line long and has the following
+entry:
+
+```
+s:{{DataNode-volumes}}:<path-to-volume-1-directory>[,<path-to-volume-2-directory>...]:g
+```
+
+In case of starting cluster with `./start-dev-cluster --count 5` command,
+every of 5 datanodes will be started with a configuration file produced with the
+help of `./config-meta/avatar-datanode.template` template. Consider having the following
+template:
+
+```
+s:{{DataNode-volumes}}:/tmp/hadoop-datanode-XXX-vol0/,/tmp/hadoop-datanode-XXX-vol1/:g
+```
+
+This would mean that the first datanode has two volumes mapped to
+`/tmp/hadoop-datanode-1-vol0/` and `/tmp/hadoop-datanode-1-vol1/` directories, and the
+forth one has `/tmp/hadoop-datanode-4-vol0/` and `/tmp/hadoop-datanode-4-vol1/`.
+That is because the "XXX" sequence in the `avatar-datanode.template` file is
+substituted with the sequential datanode number to provide it with unique
+directories on the local machine.
+
+
+What is the format of files in `config-meta` directory?
+-------------------------------------------------------
+
+These files are SED (Stream Editor) scripts. Though the syntax of SED scripts
+is not coincise, autoconf tool utilizes only `substitute` command.
+
+The substitution command basically looks like this:
+
+```
+s:cat:dog:g
+```
+
+This example will substitute every 'cat' for 'dog'. The 's' letter stands for
+'substitute' command, and the trailing 'g' is a flag that enforces sed to substitute
+every entry of 'cat'; otherwise it would be done only for first occurences of
+'cat' per line.
+
+Any symbol could be used as a command delimeter. Thus said, the followings are fully
+equal to the previous example
+```
+ s_cat_dog_g
+ s%cat%dog%g
+ s/cat/dog/g
+```
+
+This feature could be utilized to avoid escaping inside of sed scripts. Consider
+looking at the following example
+```
+ s:some-folder:/tmp/foo:g
+ s_URL_localhost:7777_g
+```
+
+
+How do I add new datanode configuration file?
+---------------------------------------------
+
+1. create a file with the name that matches format 'avatar-datanode-*.sed'
+(the format of the datanode configuration files is specified by
+`$DATANODE_CONFIG_FILES` variable in `config.sh` file)
+
+2. Fill in the file with the following content
+```
+s:{{DataNode-volumes}}:<path-to-volume-1-directory>[,<path-to-volume-2-directory>...]:g
+```
+
+
+What is an example of datanode config file with multiple volumes?
+-----------------------------------------------------------------
+
+A datanode with two volumes, each resides in its own directory, will look the
+following way
+
+```
+s:{{DataNode-volumes}}:/tmp/mydatanode-volume-1/,/tmp/mydatanode-volume-2/:g
+```
+
+So the directories should be listed one after another, separated with comma
+delimeter.
+NOTE: Make sure you do not put any spaces!
+
+
+What exactly does autoconf tool do?
+-----------------------------------
+
+Whenever autoconf tool starts some HDFS instance, it does the following
+sequence of actions:
+
+1. Picks template files from `config-templates` direcotry
+2. Runs `sed` scripts from `config-meta` directory over them
+3. Puts results of sed execution to the `hadoop-0.20/bin` directory (the path
+ to `hadoop-0.20` directory is specified via `$HADOOP_VERSION`)
+4. Launches the HDFS instance
+
+
+PRO stuff: multiple hadoop checkouts
+------------------------------------
+
+To switch between multiple hadoop checkouts just edit `./config.sh` file,
+setting a `$HADOOP_VERSION` variable to the path of checkout you would like.
+
+
+
+Files overview
+==============
+
+Client scripts
+--------------
+
+This is the list of scripts that are designed to be used by user. For more
+information, you can refer to the source code of every script or just
+run it with `--help` argument.
+
+* `./build.sh` - builds everything
+* `./avatar-format` - formats directories for avatar namenodes (both active and
+ standby)
+* `./avatar-zero-start` - starts active avatar
+* `./avatar-one-start` - starts standby avatar
+* `./avatar-datanode-start` - allows you to choose a config and start a datanode
+ instance configured according to it.
+ instance. Zookeeper is absolutely necessary for the cluster functioning, and
+ it is started and stopped automatically with cluster
+* `./start-dev-cluster.sh` - starts all the nodes as daemons for the local cluster
+* `./stop-dev-cluster.sh` - stops instantiated developer cluster (simply killing
+ all the processes with `avatar` in the name)
+* `./zookeeper.sh` - this script is used to start and stop local zookeeper
+
+
+Other directory files
+---------------------
+
+* `./config-meta` - the directory that contains all the options for the local
+ cluster
+ - `./config-meta/avatar-shared.sed` - configuration of shared directories, used by
+ both Active and Stand-by avatar nodes
+ - `./config-meta/avatar-zero.sed` - configuration of local directories for node zero
+ - `./config-meta/avatar-one.sed` - configuration of local directories for node one
+ - `./config-meta/avatar-datanode*.sed` - configuration files for datanodes, one file per
+ node.
+ - `./config-meta/avatar-datanode.template` - configuration file that is used
+ to automatically generate datanode configuration files. Read more about this
+ file in the FIXME
+* `./config-templates` - stores all the files that are been run substitutions over.
+* `./launchpad` - that stores generated scripts, should not be used
+ unless you _really_ know what you do.
+* `./scripts` - here you can find scripts that do the dirty job
+* `./README.md` - markdown README in best github traditions.
+* `./config.sh` - this file exports a `$HADOOP_VERSION` variable as well as
+ couple of other variables. You might refer to the file often if you have
+ multiple hadoop checkouts
+
View
133 hdfs-autoconf/avatar-datanode-start
@@ -0,0 +1,133 @@
+#!/bin/bash
+
+# Usage: bash avatar-datanode-start [--conf configFile] [--daemon]
+set -e
+
+usage="USAGE
+ bash $(basename $0) [--help] [--format] [--conf configFile] [--daemon]
+
+DESCRIPTION
+ Starts locally an avatar datanode with one of the configurations. If
+ the --conf options is not specified, the script brings up a menu listing
+ all the found datanode configuration files and letting user to make his
+ choice.
+
+OPTIONS
+ --help - shows this help message
+ --format - forces datanode to format its directories before it starts. If this
+ option is not given, then datanode does not format directories unless
+ they do not exist
+ --conf - specifies which configuration to use for starting datanode.
+ --daemon - starts datanode as a daemon process. Logs will go to
+ the directory specified by \$LOGS_DIRECTORY variable
+"
+
+if [[ ${PWD##*/} != "hdfs-autoconf" ]]; then
+ echo "The script should be launched from ./hdfs-autoconf directory. Exiting.."
+ exit 1
+fi
+
+if (( $# >= 1 )); then
+ if [[ "$1" == "--help" ]]; then
+ echo "$usage"
+ exit 0
+ fi
+fi
+
+format="false"
+if (( $# >= 1 )); then
+ if [[ "$1" == "--format" ]]; then
+ format="true"
+ shift;
+ fi
+fi
+
+if (( $# >= 2 )); then
+ if [[ "$1" == "--conf" ]]; then
+ shift;
+ datanodeConfig=$1;
+ shift;
+ fi
+fi
+
+daemon=false;
+if (( $# >= 1 )); then
+ if [[ "$1" == "--daemon" ]]; then
+ daemon=true;
+ shift;
+ fi
+fi
+
+if (( $# > 0 )); then
+ echo "$usage"
+ exit 1;
+fi
+
+source scripts/common.sh
+
+
+function showUserMenu {
+ echo -e "Searching for configurations ${cWHITE}$DATANODE_CONFIG_FILES${cRESET}..."
+ echo -e "Select config for this instance of datanode:"
+
+ counter=0;
+ for i in $(ls -1 $DATANODE_CONFIG_FILES); do
+ counter=$(expr $counter + 1);
+ echo -e " ${cWHITE}[$counter]${cRESET} $i"
+ done;
+
+ amount=$counter
+
+ if (( $amount == 0 )); then
+ fail "No configuration files found"
+ fi
+
+ read -p "
+ Which one to start (1-$amount): "
+ if [[ $REPLY == "" ]]; then
+ echo "Exiting...";
+ exit 0;
+ fi
+
+ if ! [[ $REPLY =~ ^[0-9]+$ ]]; then
+ fail "Command must be a number (no whitespaces!)"
+ fi
+ if !(( $REPLY > 0 && $REPLY <= $amount )); then
+ fail "Wrong command!"
+ fi
+
+ datanodeConfig=$(ls -1 $DATANODE_CONFIG_FILES | head -$REPLY | tail -1);
+}
+
+if [[ "$daemon" == "true" ]]; then
+ # HACK: we're removing *.pid files from logs directory so that hadoop
+ # daemon will allow us to start multiple instances
+ rm -f ${LOGS_DIRECTORY}/*.pid
+fi
+
+if [[ $datanodeConfig == "" ]]; then
+ showUserMenu
+fi
+
+
+# creating logs subdirectory from the name of config file
+datanodeLogsDirectory=${datanodeConfig##*/}
+datanodeLogsDirectory=${datanodeLogsDirectory%.*}
+export HADOOP_LOG_DIR=${LOGS_DIRECTORY}/$datanodeLogsDirectory
+./scripts/gen-datanode $datanodeConfig
+if [[ $format == "true" ]]; then
+ ./$LAUNCHPAD_DIR/dn-format --hard
+else
+ ./$LAUNCHPAD_DIR/dn-format --soft
+fi
+
+runArgs="";
+if [[ "$daemon" == "true" ]]; then
+ runArgs="$runArgs --daemon";
+fi
+
+./$LAUNCHPAD_DIR/run $runArgs
+
+# wait some time to make sure the running instance actually
+# read all the config files
+sleep 3
View
65 hdfs-autoconf/avatar-format
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+set -e
+
+usage="USAGE
+ bash $(basename $0) [--help]
+
+DESCRIPTION
+ Formats directories that are used for both Active and Standby namenodes.
+
+OPTIONS
+ --help - show this help message
+"
+
+if [[ ${PWD##*/} != "hdfs-autoconf" ]]; then
+ echo "The script should be launched from ./hdfs-autoconf directory. Exiting.."
+ exit 1
+fi
+
+if (( $# >= 1 )); then
+ if [[ $1 == "--help" ]]; then
+ echo "$usage";
+ exit 0;
+ fi
+fi
+
+if (( $# > 0 )); then
+ echo "$usage";
+ exit 1;
+fi
+
+source scripts/common.sh
+
+# populate config
+./scripts/gen-avatar zero
+
+# creating directory formatters
+LOCAL_DIR_FORMATTER="$TEMPLATES_DIR/format-avatarnode-local-dir.sh.template"
+SHARED_DIR_FORMATTER="$TEMPLATES_DIR/format-avatarnode-shared-dir.sh.template"
+
+AVATAR_LOCAL_ZERO="$LAUNCHPAD_DIR/avatar-zero-local-dir.sh"
+cp $LOCAL_DIR_FORMATTER $AVATAR_LOCAL_ZERO
+patch $AVATAR_LOCAL_ZERO $METACONF_DIR/avatar-zero.sed
+
+AVATAR_LOCAL_ONE="$LAUNCHPAD_DIR/avatar-one-local-dir.sh"
+cp $LOCAL_DIR_FORMATTER $AVATAR_LOCAL_ONE
+patch $AVATAR_LOCAL_ONE $METACONF_DIR/avatar-one.sed
+
+AVATAR_SHARED="$LAUNCHPAD_DIR/avatar-shared-dir.sh"
+cp $SHARED_DIR_FORMATTER $AVATAR_SHARED
+patch $AVATAR_SHARED $METACONF_DIR/avatar-shared.sed
+
+echo "Creating avatar directories"
+bash $AVATAR_LOCAL_ZERO
+bash $AVATAR_LOCAL_ONE
+bash $AVATAR_SHARED
+
+
+echo "Formatting avatar..."
+source config.sh
+cd ${HADOOP_VERSION}/bin
+./hadoop avatarzk -updateZK -zero -force
+./hadoop avatarnode -format
+echo -e "${cGREEN}Done.${cRESET}"
+
View
61 hdfs-autoconf/avatar-one-start
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+# Usage: bash avatar-one-start [--daemon]
+
+set -e
+usage="USAGE
+ bash $(basename $0) [--help] [--daemon]
+
+DESCRIPTION
+ Starts locally an avatar namenode which is stand-by default.
+
+OPTIONS
+ --help - shows this help message
+ --daemon - starts avatar as a daemon process. Logs will go to
+ the directory specified by \$LOGS_DIRECTORY variable
+"
+
+if [[ ${PWD##*/} != "hdfs-autoconf" ]]; then
+ echo "The script should be launched from ./hdfs-autoconf directory. Exiting.."
+ exit 1
+fi
+
+if (( $# >= 1)); then
+ if [[ $1 == "--help" ]]; then
+ echo "$usage";
+ exit 0;
+ fi
+fi
+
+daemon="false";
+if (( $# >= 1 )); then
+ if [[ $1 == "--daemon" ]]; then
+ daemon="true"
+ shift;
+ fi;
+fi
+
+if (( $# > 0 )); then
+ echo "$usage";
+ exit 1;
+fi
+
+source config.sh
+
+runArgs="";
+if [[ "$daemon" == "true" ]]; then
+ # HACK: after every launch we should remove `pid` file so that
+ # `hadoop-daemon.sh` that is actually called in the depth)
+ # won't complain about instances that are already started
+ rm -f ${LOGS_DIRECTORY}/*.pid
+ runArgs="--daemon";
+fi
+
+export HADOOP_LOG_DIR=${LOGS_DIRECTORY}/avatar-one-logs
+
+./scripts/gen-avatar one
+$LAUNCHPAD_DIR/run $runArgs
+
+# wait some time to make sure the running instance actually
+# read all the config files
+sleep 3
View
67 hdfs-autoconf/avatar-zero-start
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Usage: bash avatar-one-start [--daemon]
+set -e
+
+usage="USAGE
+ bash $(basename $0) [--help] [--daemon]
+
+DESCRIPTION
+ Starts locally an avatar namenode which is active by default.
+
+OPTIONS
+ --help - shows this help message
+ --daemon - starts avatar as a daemon process. Logs will go to
+ the directory specified by \$LOGS_DIRECTORY variable
+"
+
+if [[ ${PWD##*/} != "hdfs-autoconf" ]]; then
+ echo "The script should be launched from ./hdfs-autoconf directory. Exiting.."
+ exit 1
+fi
+
+if (( $# >= 1)); then
+ if [[ $1 == "--help" ]]; then
+ echo "$usage";
+ exit 0;
+ fi
+fi
+
+daemon="false";
+if (( $# >= 1 )); then
+ if [[ $1 == "--daemon" ]]; then
+ daemon="true";
+ shift;
+ fi
+fi
+
+if (( $# > 0 )); then
+ echo "$usage"
+ exit 1
+fi
+
+source config.sh
+
+runArgs=""
+if [[ "$daemon" == "true" ]]; then
+ # HACK: after every launch we should remove `pid` file so that
+ # `hadoop-daemon.sh` that is actually called in the depth)
+ # won't complain about instances that are already started
+ rm -f ${LOGS_DIRECTORY}/*.pid
+
+ runArgs="$runArgs --daemon"
+fi
+
+./scripts/gen-avatar zero
+
+cd ${HADOOP_VERSION}/bin
+./hadoop avatarzk -updateZK -zero -force
+cd -
+
+export HADOOP_LOG_DIR=${LOGS_DIRECTORY}/avatar-zero-logs
+
+$LAUNCHPAD_DIR/run $runArgs
+
+# wait some time to make sure the running instance actually
+# read all the config files
+sleep 3
View
48 hdfs-autoconf/build.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+set -e
+
+usage="USAGE
+ $(basename $0) [--help] [--fast]
+
+DESCRIPTION
+ Builds HDFS from sources.
+
+OPTIONS
+ --help - shows this help
+ --fast - EXPERIMENTAL option, does some build 3 times faster than default
+ build.
+"
+
+if (( $# >= 1 )); then
+ if [[ $1 == "--help" ]]; then
+ echo "$usage";
+ exit 0;
+ fi
+fi
+
+compile="full"
+if (( $# >= 1 )); then
+ if [[ $1 == "--fast" ]]; then
+ compile="fast"
+ shift
+ fi
+fi
+
+if (( $# > 0 )); then
+ echo "$usage"
+ exit 1
+fi
+
+source config.sh
+
+cd ${HADOOP_VERSION};
+
+if [[ $compile == "full" ]]; then
+ ant clean compile
+elif [[ $compile == "fast" ]]; then
+ ant clean compile-core
+ cd src/contrib/highavailability
+ ant clean compile
+fi
+
+
View
2  hdfs-autoconf/config-meta/avatar-datanode-1.sed
@@ -0,0 +1,2 @@
+# datanode volumes: list without spaces, comma-delimeted
+s:{{DataNode-volumes}}:/tmp/hadoop-datanode-0-vol0/,/tmp/hadoop-datanode-0-vol1/:g
View
2  hdfs-autoconf/config-meta/avatar-datanode-2.sed
@@ -0,0 +1,2 @@
+# datanode config
+s:{{DataNode-volumes}}:/tmp/hadoop-datanode-1-vol0/,/tmp/hadoop-datanode-1-vol1/:g
View
6 hdfs-autoconf/config-meta/avatar-datanode.template
@@ -0,0 +1,6 @@
+# This file is used as a template for generating datanode config files
+# automatically. Instead of "XXX" it will subsitute sequential number
+# of a datanode instance. This way you can specify the format and amount
+# of volumes for the automatically generated datanode configuration files
+#
+s:{{DataNode-volumes}}:/tmp/hadoop-datanode-XXX-vol0/,/tmp/hadoop-datanode-XXX-vol1/:g
View
5 hdfs-autoconf/config-meta/avatar-one.sed
@@ -0,0 +1,5 @@
+# local avatar 1 config
+s:{{NameNode-local}}:/tmp/hadoop-avatar-1-local/:g
+s:{{NameNode-local-fsimage}}:/tmp/hadoop-avatar-1-local/fsimage/:g
+s:{{NameNode-local-fsedits}}:/tmp/hadoop-avatar-1-local/fsedits/:g
+
View
10 hdfs-autoconf/config-meta/avatar-shared.sed
@@ -0,0 +1,10 @@
+# setting up shared avatar directories
+# all these paths will be created relatively to /tmp directory
+s:{{NameNode-shared}}:/tmp/hadoop-avatar-shared/:g
+s:{{NameNode-shared-fsimage-0}}:/tmp/hadoop-avatar-shared/fsimage-zero/:g
+s:{{NameNode-shared-fsedits-0}}:/tmp/hadoop-avatar-shared/fsedits-zero/:g
+s:{{NameNode-shared-fsimage-1}}:/tmp/hadoop-avatar-shared/fsimage-one/:g
+s:{{NameNode-shared-fsedits-1}}:/tmp/hadoop-avatar-shared/fsedits-one/:g
+
+# ground may be a separator as well
+s_{{zookeeper-quorum}}_localhost_g
View
5 hdfs-autoconf/config-meta/avatar-zero.sed
@@ -0,0 +1,5 @@
+# local avatar 0 config
+s:{{NameNode-local}}:/tmp/hadoop-avatar-0-local/:g
+s:{{NameNode-local-fsimage}}:/tmp/hadoop-avatar-0-local/fsimage/:g
+s:{{NameNode-local-fsedits}}:/tmp/hadoop-avatar-0-local/fsedits/:g
+
View
115 hdfs-autoconf/config-templates/avatar-site.xml.template
@@ -0,0 +1,115 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+ <name>dfs.http.address0</name>
+ <value>localhost:50070</value>
+ <description>
+ The address and the base port where the dfs namenode web ui will listen on.
+ If the port is 0 then the server will start on a free port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.http.address1</name>
+ <value>localhost:50080</value>
+ <description>
+ The address and the base port where the dfs namenode web ui will listen on.
+ If the port is 0 then the server will start on a free port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.name.dir</name>
+ <value>{{NameNode-local-fsimage}}</value>
+ <description>Determines where on the local filesystem the DFS name node
+ should store the name table(fsimage). If this is a comma-delimited list
+ of directories then the name table is replicated in all of the
+ directories, for redundancy. </description>
+</property>
+
+<property>
+ <name>dfs.name.edits.dir</name>
+ <value>{{NameNode-local-fsedits}}</value>
+ <description>Determines where on the local filesystem the DFS name node
+ should store the transaction (edits) file. If this is a comma-delimited list of directories then the transaction file is replicated in all of the
+ directories, for redundancy. Default value is same as dfs.name.dir
+ </description>
+</property>
+
+<property>
+ <name>dfs.name.dir.shared0</name>
+ <value>{{NameNode-shared-fsimage-0}}</value>
+ <description>Determines where on the filer the AvatarNode
+ should store the name table(fsimage).
+ </description>
+</property>
+
+<property>
+ <name>dfs.name.dir.shared1</name>
+ <value>{{NameNode-shared-fsimage-1}}</value>
+ <description>Determines where on the filer the other instance of the AvatarNode
+ should store the name table(fsimage).
+ </description>
+</property>
+
+<property>
+ <name>dfs.name.edits.dir.shared0</name>
+ <value>{{NameNode-shared-fsedits-0}}</value>
+ <description>Determines where on the filer the AvatarNode
+ should store the transaction (edits) file. If this is a comma-delimited list of directories then the transaction file is replicated in all of the
+ directories, for redundancy. Default value is same as dfs.name.dir
+ </description>
+</property>
+
+<property>
+ <name>dfs.name.edits.dir.shared1</name>
+ <value>{{NameNode-shared-fsedits-1}}</value>
+ <description>Determines where on the filer the other instance of the AvatarNode
+ should store the transaction (edits) file.
+ </description>
+</property>
+
+<property>
+ <name>fs.checkpoint.enabled</name>
+ <value>true</value>
+</property>
+
+<property>
+ <name>standby.image.copies.tokeep</name>
+ <value>5</value>
+ <description>The number of backup copies of the image
+ and fsedits to keep around.
+ </description>
+</property>
+
+<property>
+ <name>standby.image.days.tokeep</name>
+ <value>2</value>
+ <description>How old should the backup image
+ be to get deleted.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.dn-address0</name>
+ <value>localhost:9005</value>
+ <description>
+ The address and port to run the RPC server which will be processing
+ requests from datanodes in the cluster.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.dn-address1</name>
+ <value>localhost:9006</value>
+ <description>
+ The address and port to run the RPC server which will be processing
+ requests from datanodes in the cluster.
+ </description>
+</property>
+
+
+</configuration>
View
95 hdfs-autoconf/config-templates/core-site.xml.template
@@ -0,0 +1,95 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+ <name>fs.default.name</name>
+ <value>hdfs://localhost:9000</value>
+ <description>The name of the default file system. A URI whose
+ scheme and authority determine the FileSystem implementation. The
+ uri's scheme determines the config property (fs.SCHEME.impl) naming
+ the FileSystem implementation class. The uri's authority is used to
+ determine the host, port, etc. for a filesystem.</description>
+</property>
+
+<property>
+ <name>fs.default.name0</name>
+ <value>hdfs://localhost:9000</value>
+ <description>The name of the default file system. A URI whose
+ scheme and authority determine the FileSystem implementation. The
+ uri's scheme determines the config property (fs.SCHEME.impl) naming
+ the FileSystem implementation class. The uri's authority is used to
+ determine the host, port, etc. for a filesystem.</description>
+</property>
+
+<property>
+ <name>fs.default.name1</name>
+ <value>hdfs://localhost:9010</value>
+ <description>The name of the default file system. A URI whose
+ scheme and authority determine the FileSystem implementation. The
+ uri's scheme determines the config property (fs.SCHEME.impl) naming
+ the FileSystem implementation class. The uri's authority is used to
+ determine the host, port, etc. for a filesystem.</description>
+</property>
+
+<property>
+ <name>fs.checkpoint.period</name>
+ <value>600</value>
+ <description>
+ The number of seconds between two periodic checkpoints
+ </description>
+</property>
+
+<property>
+ <name>fs.checkpoint.size</name>
+ <value>10000000</value>
+ <description>
+ Defines the size of the edits log file that forces an urgent checkpoint even
+ if the maximum checkpoint delay is not reached.
+ </description>
+</property>
+
+<property>
+ <name>fs.ha.zookeeper.quorum</name>
+ <value>{{zookeeper-quorum}}</value>
+ <description>The list of ZK servers DAFS will be connecting to</description>
+</property>
+
+<property>
+ <name>ipc.client.connect.max.retries</name>
+ <value>10</value>
+</property>
+
+<property>
+ <name>ipc.client.connect.timeout</name>
+ <value>5</value>
+</property>
+
+
+<property>
+ <name>fs.hdfs.impl</name>
+ <value>org.apache.hadoop.hdfs.DistributedAvatarFileSystem</value>
+</property>
+
+<property>
+ <name>fs.ha.zookeeper.cache</name>
+ <value>true</value>
+</property>
+
+<property>
+ <name>fs.ha.zookeeper.timeout</name>
+ <value>30000</value>
+ <description> Indicates the session timeout for a zookeeper client connection</description>
+</property>
+
+<property>
+ <name>fs.ha.retrywrites</name>
+ <value>true</value>
+ <description>retry writes or not</description>
+</property>
+
+</configuration>
+
View
53 hdfs-autoconf/config-templates/format-avatardatanode.sh.template
@@ -0,0 +1,53 @@
+#!/bin/bash
+set -e
+
+usage="USAGE
+ bash $(basename $0) [--help] [--soft]
+
+DESCRIPTION
+ Formats all the directories needed for every datanodes' volume.
+ In case the directory for volume already exists, it recreates it
+ thus deleting all the underlying data (this is also called HARD mode),
+ unless --soft option is given
+
+OPTIONS
+ --help - shows this help message
+ --soft - does not recreate directory if it already exists. This
+ option is used to preserve the data of the datanode
+"
+
+if (( $# >= 1 )); then
+ if [[ "$1" == "--help" ]]; then
+ echo "$usage"
+ exit 0
+ fi
+fi
+
+soft="false";
+if (( $# >= 1 )); then
+ if [[ "$1" == "--soft" ]]; then
+ soft="true";
+ shift;
+ fi
+fi
+
+volumeDirs=$(echo {{DataNode-volumes}} | tr ',' '\n');
+echo "Volume dirs: $volumeDirs"
+
+if [[ "$soft" == "true" ]]; then
+ echo "Datanode is formatted in a SOFT mode"
+ for i in $volumeDirs; do
+ if ! [[ -d $i ]]; then
+ mkdir $i;
+ fi
+ done;
+elif [[ "$soft" == "false" ]]; then
+ echo "Datanode is formatted in a HARD mode"
+ for i in $volumeDirs; do
+ rm -rf $i;
+ mkdir $i;
+ done;
+else
+ echo "This is a bug. Local variable \$soft has a bad value of $soft"
+ exit 1
+fi
View
6 hdfs-autoconf/config-templates/format-avatarnode-local-dir.sh.template
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+rm -rf {{NameNode-local}};
+mkdir -p {{NameNode-local-fsimage}};
+mkdir -p {{NameNode-local-fsedits}};
+
View
7 hdfs-autoconf/config-templates/format-avatarnode-shared-dir.sh.template
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+rm -rf {{NameNode-shared}};
+mkdir -p {{NameNode-shared-fsimage-0}};
+mkdir -p {{NameNode-shared-fsedits-0}};
+mkdir -p {{NameNode-shared-fsimage-1}};
+mkdir -p {{NameNode-shared-fsedits-1}};
View
71 hdfs-autoconf/config-templates/hadoop-env-avatar-one.sh
@@ -0,0 +1,71 @@
+if [[ "$IS_HADOOP_ENV_ALREADY_SOURCED" != "true" ]]; then
+ export IS_HADOOP_ENV_ALREADY_SOURCED="true"
+ # Set Hadoop-specific environment variables here.
+
+ # The only required environment variable is JAVA_HOME. All others are
+ # optional. When running a distributed configuration it is best to
+ # set JAVA_HOME in this file, so that it is correctly defined on
+ # remote nodes.
+
+ # The java implementation to use. Required.
+ # export JAVA_HOME=/usr/lib/j2sdk1.5-sun
+
+ # Extra Java CLASSPATH elements. Optional.
+ export HADOOP_CLASSPATH=${HADOOP_TRUNK_MAIN}/VENDOR/hadoop-0.20/lib/
+
+ # The maximum amount of heap to use, in MB. Default is 1000.
+ # export HADOOP_HEAPSIZE=2000
+
+ # Extra Java runtime options. Empty by default.
+ # export HADOOP_OPTS=-server
+
+ # Command specific options appended to HADOOP_OPTS when specified
+ export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
+ export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
+ export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
+ export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
+ export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
+ export HADOOP_RAIDNODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_RAIDNODE_OPTS"
+ export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote.port=8998 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
+
+ # The only user who can start hadoop daemons.
+ # If this is not set, any user can start hadoop daemons.
+ # export HADOOP_USERNAME="hadoop"
+
+ # Java Runtime garbage collection options to pass to all Hadoop
+ # servers (Namenode, Jobtracker, Datanode, Tasktracker). This must end
+ # with a colon ; to which the dynamically generated gc log filename will
+ # be appended to. The below defaults work for the Sun JVM, for example
+ # in IBM GC, use '-Xverbosegclog:'.
+ #export HADOOP_GC_LOG_OPTS="-XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:"
+
+ # export HADOOP_TASKTRACKER_OPTS=
+ # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+ # export HADOOP_CLIENT_OPTS
+
+ # Extra ssh options. Empty by default.
+ # export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
+
+ # Where log files are stored. $HADOOP_HOME/logs by default.
+ # export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
+
+ # File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
+ # export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
+
+ # host:path where hadoop code should be rsync'd from. Unset by default.
+ # export HADOOP_MASTER=master:/home/$USER/src/hadoop
+
+ # Seconds to sleep between slave commands. Unset by default. This
+ # can be useful in large clusters, where, e.g., slave rsyncs can
+ # otherwise arrive faster than the master can service them.
+ # export HADOOP_SLAVE_SLEEP=0.1
+
+ # The directory where pid files are stored. /tmp by default.
+ # export HADOOP_PID_DIR=/var/hadoop/pids
+
+ # A string representing this instance of hadoop. $USER by default.
+ # export HADOOP_IDENT_STRING=$USER
+
+ # The scheduling priority for daemon processes. See 'man nice'.
+ # export HADOOP_NICENESS=10
+fi
View
71 hdfs-autoconf/config-templates/hadoop-env-avatar-zero.sh
@@ -0,0 +1,71 @@
+if [[ "$IS_HADOOP_ENV_ALREADY_SOURCED" != "true" ]]; then
+ export IS_HADOOP_ENV_ALREADY_SOURCED="true"
+ # Set Hadoop-specific environment variables here.
+
+ # The only required environment variable is JAVA_HOME. All others are
+ # optional. When running a distributed configuration it is best to
+ # set JAVA_HOME in this file, so that it is correctly defined on
+ # remote nodes.
+
+ # The java implementation to use. Required.
+ # export JAVA_HOME=/usr/lib/j2sdk1.5-sun
+
+ # Extra Java CLASSPATH elements. Optional.
+ #export HADOOP_CLASSPATH=${HADOOP_TRUNK_MAIN}/VENDOR/hadoop-0.20/lib/
+
+ # The maximum amount of heap to use, in MB. Default is 1000.
+ export HADOOP_HEAPSIZE=2000
+
+ # Extra Java runtime options. Empty by default.
+ # export HADOOP_OPTS=-server
+
+ # Command specific options appended to HADOOP_OPTS when specified
+ export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
+ export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
+ export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
+ export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
+ export HADOOP_RAIDNODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_RAIDNODE_OPTS"
+ export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote -Xmx3g -Xms3g $HADOOP_NAMENODE_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=9070"
+ #export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote.port=8998 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
+
+ # The only user who can start hadoop daemons.
+ # If this is not set, any user can start hadoop daemons.
+ #export HADOOP_USERNAME="hadoop"
+
+ # Java Runtime garbage collection options to pass to all Hadoop
+ # servers (Namenode, Jobtracker, Datanode, Tasktracker). This must end
+ # with a colon ; to which the dynamically generated gc log filename will
+ # be appended to. The below defaults work for the Sun JVM, for example
+ # in IBM GC, use '-Xverbosegclog:'.
+ #export HADOOP_GC_LOG_OPTS="-XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:"
+
+ # export HADOOP_TASKTRACKER_OPTS=
+ # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+ # export HADOOP_CLIENT_OPTS
+
+ # Extra ssh options. Empty by default.
+ # export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
+
+ # Where log files are stored. $HADOOP_HOME/logs by default.
+ # export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
+
+ # File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
+ # export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
+
+ # host:path where hadoop code should be rsync'd from. Unset by default.
+ # export HADOOP_MASTER=master:/home/$USER/src/hadoop
+
+ # Seconds to sleep between slave commands. Unset by default. This
+ # can be useful in large clusters, where, e.g., slave rsyncs can
+ # otherwise arrive faster than the master can service them.
+ # export HADOOP_SLAVE_SLEEP=0.1
+
+ # The directory where pid files are stored. /tmp by default.
+ # export HADOOP_PID_DIR=/var/hadoop/pids
+
+ # A string representing this instance of hadoop. $USER by default.
+ # export HADOOP_IDENT_STRING=$USER
+
+ # The scheduling priority for daemon processes. See 'man nice'.
+ # export HADOOP_NICENESS=10
+fi
View
71 hdfs-autoconf/config-templates/hadoop-env-datanode.sh
@@ -0,0 +1,71 @@
+if [[ "$IS_HADOOP_ENV_ALREADY_SOURCED" != "true" ]]; then
+ export IS_HADOOP_ENV_ALREADY_SOURCED="true"
+ # Set Hadoop-specific environment variables here.
+
+ # The only required environment variable is JAVA_HOME. All others are
+ # optional. When running a distributed configuration it is best to
+ # set JAVA_HOME in this file, so that it is correctly defined on
+ # remote nodes.
+
+ # The java implementation to use. Required.
+ # export JAVA_HOME=/usr/lib/j2sdk1.5-sun
+
+ # Extra Java CLASSPATH elements. Optional.
+ # export HADOOP_CLASSPATH=
+
+ # The maximum amount of heap to use, in MB. Default is 1000.
+ # export HADOOP_HEAPSIZE=2000
+
+ # Extra Java runtime options. Empty by default.
+ # export HADOOP_OPTS=-server
+
+ # Command specific options appended to HADOOP_OPTS when specified
+ export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
+ export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
+ export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
+ export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
+ export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
+ export HADOOP_RAIDNODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_RAIDNODE_OPTS"
+ #export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote.port=8998 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false"
+
+ # The only user who can start hadoop daemons.
+ # If this is not set, any user can start hadoop daemons.
+ # export HADOOP_USERNAME="hadoop"
+
+ # Java Runtime garbage collection options to pass to all Hadoop
+ # servers (Namenode, Jobtracker, Datanode, Tasktracker). This must end
+ # with a colon ; to which the dynamically generated gc log filename will
+ # be appended to. The below defaults work for the Sun JVM, for example
+ # in IBM GC, use '-Xverbosegclog:'.
+ #export HADOOP_GC_LOG_OPTS="-XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:"
+
+ # export HADOOP_TASKTRACKER_OPTS=
+ # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+ # export HADOOP_CLIENT_OPTS
+
+ # Extra ssh options. Empty by default.
+ # export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
+
+ # Where log files are stored. $HADOOP_HOME/logs by default.
+ # export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
+
+ # File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
+ # export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
+
+ # host:path where hadoop code should be rsync'd from. Unset by default.
+ # export HADOOP_MASTER=master:/home/$USER/src/hadoop
+
+ # Seconds to sleep between slave commands. Unset by default. This
+ # can be useful in large clusters, where, e.g., slave rsyncs can
+ # otherwise arrive faster than the master can service them.
+ # export HADOOP_SLAVE_SLEEP=0.1
+
+ # The directory where pid files are stored. /tmp by default.
+ # export HADOOP_PID_DIR=/var/hadoop/pids
+
+ # A string representing this instance of hadoop. $USER by default.
+ # export HADOOP_IDENT_STRING=$USER
+
+ # The scheduling priority for daemon processes. See 'man nice'.
+ # export HADOOP_NICENESS=10
+fi
View
118 hdfs-autoconf/config-templates/hdfs-site.xml.template
@@ -0,0 +1,118 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+<name>dfs.replication</name>
+<value>1</value>
+</property>
+
+<property>
+ <name>dfs.http.address</name>
+ <value>127.0.0.1:50070</value>
+ <description>
+ The address and the base port where the dfs namenode web ui will listen on.
+ If the port is 0 then the server will start on a free port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.secondary.http.address</name>
+ <value>0.0.0.0:0</value>
+ <description>
+ The secondary namenode http server address and port.
+ If the port is 0 then the server will start on a free port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.blockreport.intervalMsec</name>
+ <value>300000</value>
+ <description>Determines block reporting interval in milliseconds.</description>
+</property>
+
+<property>
+ <name>dfs.fullblockreport.magnifier</name>
+ <value>2</value>
+ <description>
+ Determines the full block reporting interval, which is magnifier
+ times the delete block report interval.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.address</name>
+ <value>0.0.0.0:0</value>
+ <description>
+ The address where the datanode server will listen to.
+ If the port is 0 then the server will start on a free port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.http.address</name>
+ <value>0.0.0.0:0</value>
+ <description>
+ The datanode http server address and port.
+ If the port is 0 then the server will start on a free port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.ipc.address</name>
+ <value>0.0.0.0:0</value>
+ <description>
+ The datanode ipc server address and port.
+ If the port is 0 then the server will start on a free port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.handler.count</name>
+ <value>3</value>
+ <description>The number of server threads for the datanode.</description>
+</property>
+
+<property>
+ <name>dfs.permissions</name>
+ <value>false</value>
+</property>
+
+<property>
+ <name>dfs.data.dir</name>
+ <value>{{DataNode-volumes}}</value>
+ <description>Determines where on the local filesystem an DFS data node
+ should store its blocks. If this is a comma-delimited
+ list of directories, then data will be stored in all named
+ directories, typically on different devices.
+ Directories that do not exist are ignored.
+ </description>
+</property>
+
+<property>
+ <name>dfs.block.invalidate.limit</name>
+ <value>100</value>
+</property>
+
+<property>
+ <name>dfs.safemode.extension</name>
+ <value>10000</value>
+ <description>
+ Determines extension of safe mode in milliseconds
+ after the threshold level is reached.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.dn-address</name>
+ <value>localhost:9015</value>
+ <description>
+ The address and port to run the RPC server which will be processing
+ requests from datanodes in the cluster.
+ </description>
+</property>
+
+</configuration>
View
12 hdfs-autoconf/config-templates/run-datanode.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+#Usage: bash $LAUNCHPAD_DIR/run.sh [--daemon]
+
+source config.sh
+
+cd ${HADOOP_VERSION}/bin
+if [[ $# > 0 && $1 == "--daemon" ]]; then
+ export HADOOP_PID_DIR="$LOGS_DIRECTORY" && ./hadoop-daemon.sh start avatardatanode
+else
+ ./hadoop avatardatanode
+fi
+
View
12 hdfs-autoconf/config-templates/run-one.template
@@ -0,0 +1,12 @@
+#!/bin/bash
+#Usage: bash $LAUNCHPAD_DIR/run.sh [--daemon]
+
+source config.sh
+
+cd $HADOOP_VERSION/bin
+if [[ $# > 0 && $1 == "--daemon" ]]; then
+ echo "daemon mode"
+ export HADOOP_PID_DIR="$LOGS_DIRECTORY" && ./hadoop-daemon.sh start avatarnode -one -standby;
+else
+ ./hadoop avatarnode -one -standby;
+fi
View
12 hdfs-autoconf/config-templates/run-zero.template
@@ -0,0 +1,12 @@
+#!/bin/bash
+#Usage: bash $LAUNCHPAD_DIR/run.sh [--daemon]
+
+source config.sh
+
+cd $HADOOP_VERSION/bin
+
+if [[ $# > 0 && $1 == "--daemon" ]]; then
+ export HADOOP_PID_DIR="$LOGS_DIRECTORY" && ./hadoop-daemon.sh start avatarnode -zero;
+else
+ ./hadoop avatarnode -zero;
+fi
View
25 hdfs-autoconf/config-templates/zoo.cfg
@@ -0,0 +1,25 @@
+# The number of milliseconds of each tick
+tickTime=2000
+# The number of ticks that the initial
+# synchronization phase can take
+initLimit=10
+# The number of ticks that can pass between
+# sending a request and getting an acknowledgement
+syncLimit=5
+# the directory where the snapshot is stored.
+# do not use /tmp for storage, /tmp here is just
+# example sakes.
+dataDir=/tmp/zookeeper
+# the port at which the clients will connect
+clientPort=2181
+#
+# Be sure to read the maintenance section of the
+# administrator guide before turning on autopurge.
+#
+# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
+#
+# The number of snapshots to retain in dataDir
+#autopurge.snapRetainCount=3
+# Purge task interval in hours
+# Set to "0" to disable auto purge feature
+#autopurge.purgeInterval=1
View
57 hdfs-autoconf/config.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# This script is sources by every other script.
+
+# let's stop execution when some simlpe command fails
+set -e
+
+# ==================================================
+# CONFIGURE BEFORE USE
+# ==================================================
+
+# This argument specifies the hadoop checkout. So the binaries will be run
+# from ${HADOOP_VERSION}/bin directory, and configuration files assumed to be
+# located in ${HADOOP_VERSION}/conf directory.
+# HADOOP_VERSION=
+if [[ -z $HADOOP_VERSION ]]; then
+ HADOOP_VERSION=$(readlink -f ../)
+fi
+
+# This is the directory that will hold all the log files for different
+# instances.
+# DISCLAIMER: Full path must be specified here!
+if [[ -z $LOGS_DIRECTORY ]]; then
+ LOGS_DIRECTORY=$HADOOP_VERSION/logs
+fi
+
+# ===================================================
+# ===================================================
+
+
+METACONF_DIR="./config-meta"
+TEMPLATES_DIR="./config-templates"
+LAUNCHPAD_DIR="./launchpad"
+# This is the pattern that will be searched for the datanode configuration files
+DATANODE_CONFIG_FILES="$METACONF_DIR/avatar-datanode*.sed"
+# This is the file that will exist as long as the cluster is running.
+# Used by start-dev-cluster and stop-dev-cluster scripts
+CLUSTER_IS_RUNNING=$LOGS_DIRECTORY/cluster-is-running-now
+
+
+if ! [[ -d $METACONF_DIR ]]; then
+ echo "Cannot find $METACONF_DIR directory; check config.sh to correct the dir"
+ exit 1
+fi
+
+if ! [[ -d $TEMPLATES_DIR ]]; then
+ echo "Cannot find $TEMPLATES_DIR directory; check config.sh to correct the dir"
+ exit 1
+fi
+
+if ! [[ -d $LAUNCHPAD_DIR ]]; then
+ mkdir -p $LAUNCHPAD_DIR
+fi
+
+if [[ -z $ZOOKEEPER_PATH ]]; then
+ ZOOKEEPER_PATH="`pwd`/../../../VENDOR.zookeeper/fb-trunk/"
+fi
View
77 hdfs-autoconf/scripts/common.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+source config.sh
+
+# Colors!
+# How to use them? See example:
+# echo -e "See the real ${cRED}RED${cRESET} color"
+
+cBLACK='\E[0;30m'
+cRED='\E[0;31m'
+cGREEN='\E[0;32m'
+cYELLOW='\E[0;33m'
+cBLUE='\E[0;34m'
+cMAGENTA='\E[0;35m'
+cCYAN='\E[0;36m'
+cWHITE='\E[1;37m'
+cRESET='\E[00m'
+
+# just print a message in red color
+function fail {
+ echo -e "${cRED}$1${cRESET}"
+ exit 1
+}
+
+# The script patches a template file with sed scripts. All changes
+# are made in-place
+#
+# Usage
+# bash patcher.sh <template file> <sed script 1> <sed script 2> ...
+function patch {
+ if [[ $# < 2 ]]; then
+ echo "usage: bash patcher.sh <temlpate file> <sed 1> [<sed 2>...]"
+ exit 1
+ fi