This repository has been archived by the owner. It is now read-only.
Permalink
Browse files

Port YARN to branch-1.4

  • Loading branch information...
shivaram committed May 23, 2015
1 parent 74e884c commit decdb5a56400ddf56f0c37505adde8f323ccdc51
View
@@ -69,6 +69,7 @@
"hadoop_major_version": os.getenv("HADOOP_MAJOR_VERSION"),
"java_home": os.getenv("JAVA_HOME"),
"default_tachyon_mem": "%dMB" % tachyon_mb,
"system_ram_mb": "%d" % system_ram_mb,
"aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
"aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
}
View
@@ -23,6 +23,17 @@ case "$HADOOP_MAJOR_VERSION" in
rm hadoop-*.tar.gz
mv hadoop-2.0.0-cdh4.2.0/ ephemeral-hdfs/
# Have single conf dir
rm -rf /root/ephemeral-hdfs/etc/hadoop/
ln -s /root/ephemeral-hdfs/conf /root/ephemeral-hdfs/etc/hadoop
;;
yarn)
wget http://s3.amazonaws.com/spark-related-packages/hadoop-2.4.0.tar.gz
echo "Unpacking Hadoop"
tar xvzf hadoop-*.tar.gz > /tmp/spark-ec2_hadoop.log
rm hadoop-*.tar.gz
mv hadoop-2.4.0/ ephemeral-hdfs/
# Have single conf dir
rm -rf /root/ephemeral-hdfs/etc/hadoop/
ln -s /root/ephemeral-hdfs/conf /root/ephemeral-hdfs/etc/hadoop
@@ -4,6 +4,10 @@
mkdir -p /mnt/ephemeral-hdfs/logs
mkdir -p /mnt/hadoop-logs
# Setup yarn logs, local dirs
mkdir -p /mnt/yarn-local
mkdir -p /mnt/yarn-logs
# Create Hadoop and HDFS directories in a given parent directory
# (for example /mnt, /mnt2, and so on)
function create_hadoop_dirs {
View
@@ -27,8 +27,23 @@ else
fi
echo "Starting ephemeral HDFS..."
# This is different depending on version. Simple hack: just try both.
$EPHEMERAL_HDFS/sbin/start-dfs.sh
$EPHEMERAL_HDFS/bin/start-dfs.sh
# This is different depending on version.
case "$HADOOP_MAJOR_VERSION" in
1)
$EPHEMERAL_HDFS/bin/start-dfs.sh
;;
2)
$EPHEMERAL_HDFS/sbin/start-dfs.sh
;;
yarn)
$EPHEMERAL_HDFS/sbin/start-dfs.sh
echo "Starting YARN"
$EPHEMERAL_HDFS/sbin/start-yarn.sh
;;
*)
echo "ERROR: Unknown Hadoop version"
return -1
esac
popd > /dev/null
View
@@ -11,6 +11,9 @@ case "$HADOOP_MAJOR_VERSION" in
rm mr1-*.tar.gz
mv hadoop-2.0.0-mr1-cdh4.2.0/ mapreduce/
;;
yarn)
echo "Nothing to initialize for MapReduce in Hadoop 2 YARN"
;;
*)
echo "ERROR: Unknown Hadoop version"
View
@@ -22,6 +22,17 @@ case "$HADOOP_MAJOR_VERSION" in
rm hadoop-*.tar.gz
mv hadoop-2.0.0-cdh4.2.0/ persistent-hdfs/
# Have single conf dir
rm -rf /root/persistent-hdfs/etc/hadoop/
ln -s /root/persistent-hdfs/conf /root/persistent-hdfs/etc/hadoop
;;
yarn)
wget http://s3.amazonaws.com/spark-related-packages/hadoop-2.4.0.tar.gz
echo "Unpacking Hadoop"
tar xvzf hadoop-*.tar.gz > /tmp/spark-ec2_hadoop.log
rm hadoop-*.tar.gz
mv hadoop-2.4.0/ persistent-hdfs/
# Have single conf dir
rm -rf /root/persistent-hdfs/etc/hadoop/
ln -s /root/persistent-hdfs/conf /root/persistent-hdfs/etc/hadoop
View
@@ -91,8 +91,10 @@ else
1.1.0)
if [[ "$HADOOP_MAJOR_VERSION" == "1" ]]; then
wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.0-bin-hadoop1.tgz
else
elif [[ "$HADOOP_MAJOR_VERSION" == "2" ]]; then
wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.0-bin-cdh4.tgz
else
wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.0-bin-hadoop2.4.tgz
fi
;;
1.1.1)
@@ -0,0 +1,111 @@
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.scheduler.capacity.maximum-applications</name>
<value>10000</value>
<description>
Maximum number of applications that can be pending and running.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.1</value>
<description>
Maximum percent of resources in the cluster which can be used to run
application masters i.e. controls number of concurrent running
applications.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.resource-calculator</name>
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
<description>
The ResourceCalculator implementation to be used to compare
Resources in the scheduler.
The default i.e. DefaultResourceCalculator only uses Memory while
DominantResourceCalculator uses dominant-resource to compare
multi-dimensional resources such as Memory, CPU etc.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default</value>
<description>
The queues at the this level (root is the root queue).
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>100</value>
<description>Default queue target capacity.</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
<value>1</value>
<description>
Default queue user limit a percentage from 0.0 to 1.0.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
<value>100</value>
<description>
The maximum capacity of the default queue.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.state</name>
<value>RUNNING</value>
<description>
The state of the default queue. State can be one of RUNNING or STOPPED.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
<value>*</value>
<description>
The ACL of who can submit jobs to the default queue.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
<value>*</value>
<description>
The ACL of who can administer jobs on the default queue.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.node-locality-delay</name>
<value>40</value>
<description>
Number of missed scheduling opportunities after which the CapacityScheduler
attempts to schedule rack-local containers.
Typically this should be set to number of nodes in the cluster, By default is setting
approximately number of nodes in one rack which is 40.
</description>
</property>
</configuration>
@@ -15,6 +15,11 @@
<value>hdfs://{{active_master}}:9000</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://{{active_master}}:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>65536</value>
@@ -55,4 +60,9 @@
<value>{{aws_secret_access_key}}</value>
</property>
<property>
<name>hadoop.security.group.mapping</name>
<value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
</property>
</configuration>
@@ -5,6 +5,11 @@
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>{{active_master}}:9001</value>
@@ -0,0 +1,126 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# User for YARN daemons
export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
# resolve links - $0 may be a softlink
#export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
export YARN_CONF_DIR="/root/ephemeral-hdfs/conf"
# some Java parameters
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
#if [ "$JAVA_HOME" != "" ]; then
# #echo "run java in $JAVA_HOME"
# JAVA_HOME=$JAVA_HOME
#fi
#
#if [ "$JAVA_HOME" = "" ]; then
# echo "Error: JAVA_HOME is not set."
# exit 1
#fi
export JAVA_HOME={{java_home}}
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m
# For setting YARN specific HEAP sizes please use this
# Parameter and set appropriately
export YARN_HEAPSIZE=1000
# check envvars which might override default args
if [ "$YARN_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
fi
# Resource Manager specific parameters
# Specify the max Heapsize for the ResourceManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_RESOURCEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#export YARN_RESOURCEMANAGER_HEAPSIZE=1000
# Specify the max Heapsize for the timeline server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_TIMELINESERVER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#export YARN_TIMELINESERVER_HEAPSIZE=1000
# Specify the JVM options to be used when starting the ResourceManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
#export YARN_RESOURCEMANAGER_OPTS=
# Node Manager specific parameters
# Specify the max Heapsize for the NodeManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_NODEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#export YARN_NODEMANAGER_HEAPSIZE=1000
# Specify the JVM options to be used when starting the NodeManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
#export YARN_NODEMANAGER_OPTS=
# so that filenames w/ spaces are handled correctly in loops below
IFS=
# default log directory & file
#if [ "$YARN_LOG_DIR" = "" ]; then
# YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
#fi
export YARN_LOG_DIR=/mnt/ephemeral-hdfs/logs
if [ "$YARN_LOGFILE" = "" ]; then
YARN_LOGFILE='yarn.log'
fi
# default policy file for service-level authorization
if [ "$YARN_POLICYFILE" = "" ]; then
YARN_POLICYFILE="hadoop-policy.xml"
fi
# restore ordinary behaviour
unset IFS
YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi
YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
Oops, something went wrong.

0 comments on commit decdb5a

Please sign in to comment.