diff --git a/copy-dir b/copy-dir new file mode 120000 index 00000000..c333ef87 --- /dev/null +++ b/copy-dir @@ -0,0 +1 @@ +copy-dir.sh \ No newline at end of file diff --git a/copy-dir.sh b/copy-dir.sh new file mode 100755 index 00000000..ba178f25 --- /dev/null +++ b/copy-dir.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +if [[ "$#" != "1" ]] ; then + echo "Usage: copy-dir " + exit 1 +fi + +DIR=`readlink -f "$1"` +DIR=`echo "$DIR"|sed 's@/$@@'` +DEST=`dirname "$DIR"` + +SLAVES=`cat /root/spark-ec2/slaves` + +SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5" + +echo "RSYNC'ing $DIR to slaves..." +for slave in $SLAVES; do + echo $slave + rsync -e "ssh $SSH_OPTS" -az "$DIR" "$slave:$DEST" & sleep 0.5 +done +wait diff --git a/create-swap.sh b/create-swap.sh new file mode 100755 index 00000000..9ab32f84 --- /dev/null +++ b/create-swap.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +if [ $# -lt 1 ]; then + echo "Usage: create-swap " + exit 1 +fi + +if [ -e /mnt/swap ]; then + echo "/mnt/swap already exists" >&2 + exit 1 +fi + +SWAP_MB=$1 +if [[ "$SWAP_MB" != "0" ]]; then + dd if=/dev/zero of=/mnt/swap bs=1M count=$SWAP_MB + mkswap /mnt/swap + swapon /mnt/swap + echo "Added $SWAP_MB MB swap file /mnt/swap" +fi diff --git a/deploy_templates.py b/deploy_templates.py new file mode 100755 index 00000000..8ead0351 --- /dev/null +++ b/deploy_templates.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import with_statement + +import os +import sys + +# Deploy the configuration file templates in the spark-ec2/templates directory +# to the root filesystem, substituting variables such as the master hostname, +# ZooKeeper URL, etc as read from the environment. + +# Find system memory in KB and compute Spark's default limit from that +system_ram_kb = int( + os.popen("cat /proc/meminfo | grep MemTotal | awk '{print $2}'") + .read().strip()) +system_ram_mb = system_ram_kb / 1024 +if system_ram_mb > 20*1024: + # Leave 3 GB for the OS, HDFS and buffer cache + spark_mb = system_ram_mb - 3 * 1024 +elif system_ram_mb > 10*1024: + # Leave 2 GB for the OS & co. + spark_mb = system_ram_mb - 2 * 1024 +else: + # Leave 1.3 GB for the OS & co. Note that this must be more than + # 1 GB because Mesos leaves 1 GB free and requires 32 MB/task. + spark_mb = max(512, system_ram_mb - 1300) + +template_vars = { + "master_list": os.getenv("MASTERS"), + "active_master": os.getenv("MASTERS").split("\n")[0], + "slave_list": os.getenv("SLAVES"), + "zoo_list": os.getenv("MESOS_ZOO_LIST"), + "cluster_url": os.getenv("MESOS_CLUSTER_URL"), + "hdfs_data_dirs": os.getenv("HDFS_DATA_DIRS"), + "mapred_local_dirs": os.getenv("MAPRED_LOCAL_DIRS"), + "spark_local_dirs": os.getenv("SPARK_LOCAL_DIRS"), + "default_spark_mem": "%dm" % spark_mb +} + +template_dir="/root/spark-ec2/templates" + +for path, dirs, files in os.walk(template_dir): + if path.find(".svn") == -1: + dest_dir = os.path.join('/', path[len(template_dir):]) + if not os.path.exists(dest_dir): + os.makedirs(dest_dir) + for filename in files: + if filename[0] not in '#.~' and filename[-1] != '~': + dest_file = os.path.join(dest_dir, filename) + with open(os.path.join(path, filename)) as src: + with open(dest_file, "w") as dest: + print "Configuring " + dest_file + text = src.read() + for key in template_vars: + text = text.replace("{{" + key + "}}", template_vars[key]) + dest.write(text) + dest.close() diff --git a/ec2-variables.sh b/ec2-variables.sh new file mode 100755 index 00000000..b35d223b --- /dev/null +++ b/ec2-variables.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# These variables should be set before running setup.sh. +export MASTERS="ec2-107-22-79-196.compute-1.amazonaws.com" +export SLAVES="ec2-107-22-111-247.compute-1.amazonaws.com" +export HDFS_DATA_DIRS="/mnt/ephemeral-hdfs/data,/mnt2/ephemeral-hdfs/data" +export MAPRED_LOCAL_DIRS="/mnt/hadoop/mrlocal,/mnt2/hadoop/mrlocal" + +export MESOS_ZOO_LIST="none" +export SWAP_MB=1024 + +# Supported modules +# spark +# ephemeral-hdfs +# persistent-hdfs +# mesos +export MODULES="spark ephemeral-hdfs mesos" + +# Other variables used in scripts +# export SPARK_LOCAL_DIRS +# export MESOS_DOWNLOAD_METHOD diff --git a/ephemeral-hdfs/setup-slave.sh b/ephemeral-hdfs/setup-slave.sh new file mode 100755 index 00000000..7afc2d71 --- /dev/null +++ b/ephemeral-hdfs/setup-slave.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Setup ephemeral-hdfs +mkdir -p /mnt/ephemeral-hdfs/logs +mkdir -p /mnt/hadoop-logs + +# Create Hadoop and HDFS directories in a given parent directory +# (for example /mnt, /mnt2, and so on) +function create_hadoop_dirs { + location=$1 + if [[ -e $location ]]; then + mkdir -p $location/ephemeral-hdfs $location/hadoop/tmp + mkdir -p $location/hadoop/mrlocal $location/hadoop/mrlocal2 + fi +} + +# Set up Hadoop and Mesos directories in /mnt +create_hadoop_dirs /mnt +create_hadoop_dirs /mnt2 +create_hadoop_dirs /mnt3 +create_hadoop_dirs /mnt4 diff --git a/ephemeral-hdfs/setup.sh b/ephemeral-hdfs/setup.sh new file mode 100755 index 00000000..10d800e3 --- /dev/null +++ b/ephemeral-hdfs/setup.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +EPHEMERAL_HDFS=/root/ephemeral-hdfs +pushd $EPHEMERAL_HDFS + +source ./setup-slave.sh + +for node in $SLAVES $OTHER_MASTERS; do + echo $node + ssh -t $SSH_OPTS root@$node "/root/spark-ec2/ephemeral-hdfs/setup-slave.sh" & sleep 0.3 +done + +/root/spark-ec2/copy-dir $EPHEMERAL_HDFS/conf + +echo "Formatting ephemeral HDFS namenode..." +$EPHEMERAL_HDFS/bin/hadoop namenode -format + +echo "Starting ephemeral HDFS..." +$EPHEMERAL_HDFS/bin/start-dfs.sh + +popd diff --git a/mesos/compute_cluster_url.py b/mesos/compute_cluster_url.py new file mode 100755 index 00000000..bb3609e0 --- /dev/null +++ b/mesos/compute_cluster_url.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import sys + +# Get the Mesos cluster URL, assuming the EC2 script environment variables +# are all available. + +active_master = os.getenv("MASTERS").split("\n")[0] +zoo_list = os.getenv("MESOS_ZOO_LIST") + +if zoo_list.strip() == "NONE": + print active_master + ":5050" +else: + zoo_nodes = zoo_list.trim().split("\n") + print "zoo://" + ",".join(["%s:2181/mesos" % node for node in zoo_nodes]) diff --git a/mesos/redeploy-mesos b/mesos/redeploy-mesos new file mode 100755 index 00000000..c99ead35 --- /dev/null +++ b/mesos/redeploy-mesos @@ -0,0 +1,24 @@ +#!/bin/bash +cd /root/spark-ec2 + +MASTERS=`cat masters` +NUM_MASTERS=`cat masters | wc -l` +SLAVES=`cat slaves` + +SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5" + +if [[ $NUM_MASTERS -gt 1 ]]; then + echo "RSYNC'ing /root/mesos to masters..." + for master in $MASTERS; do + echo $master + rsync -e "ssh $SSH_OPTS" -az --exclude '*.d' --exclude '*.o' --exclude '*.cpp' --exclude '*.hpp' --exclude '*.pyc' --exclude 'mesos/frameworks/hadoop-0.20.0/logs/*' --exclude 'mesos/work' --exclude 'mesos/logs' --exclude 'mesos/test_output' /root/mesos $master:/root & sleep 0.3 + done + wait +fi + +echo "RSYNC'ing /root/mesos to slaves..." +for slave in $SLAVES; do + echo $slave + rsync -e "ssh $SSH_OPTS" -az --exclude '*.d' --exclude '*.o' --exclude '*.cpp' --exclude '*.hpp' --exclude '*.pyc' --exclude 'mesos/frameworks/hadoop-0.20.0/logs/*' --exclude 'mesos/work' --exclude 'mesos/logs' --exclude 'mesos/test_output' /root/mesos $slave:/root & sleep 0.3 +done +wait diff --git a/mesos/setup.sh b/mesos/setup.sh new file mode 100755 index 00000000..61ef3566 --- /dev/null +++ b/mesos/setup.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +echo "$MESOS_ZOO_LIST" > zoo +ZOOS=`cat zoo` + +if [[ $ZOOS = *NONE* ]]; then + NUM_ZOOS=0 + ZOOS="" +else + NUM_ZOOS=`cat zoo | wc -l` +fi + +if [[ $NUM_ZOOS != 0 ]] ; then + echo "SSH'ing to ZooKeeper server(s) to approve keys..." + zid=1 + for zoo in $ZOOS; do + echo $zoo + ssh $SSH_OPTS $zoo echo -n \; mkdir -p /tmp/zookeeper \; echo $zid \> /tmp/zookeeper/myid & + zid=$(($zid+1)) + sleep 0.3 + done +fi + +mkdir -p /mnt/mesos-logs +mkdir -p /mnt/mesos-work + +for node in $SLAVES $OTHER_MASTERS; do + ssh -t $SSH_OPTS root@$node "mkdir -p /mnt/mesos-logs /mnt/mesos-work" & sleep 0.3 +done + +DOWNLOADED=0 + +if [[ "$MESOS_DOWNLOAD_METHOD" == "git" ]] ; then + # change git's ssh command so it does not ask to accept a keys + export GIT_SSH=/root/spark-ec2/ssh-no-keychecking.sh + REPOSITORY=git://github.com/apache/mesos.git + echo "Checking out Mesos from $REPOSITORY" + pushd /root > /dev/null 2>&1 + rm -rf mesos mesos.tgz + # Set git SSH command to a script that uses -o StrictHostKeyChecking=no + git clone $REPOSITORY mesos + pushd mesos 2>&1 + git checkout -b $BRANCH --track origin/$BRANCH + popd > /dev/null 2>&1 + popd > /dev/null 2>&1 + DOWNLOADED=1 +fi + +# Build Mesos if we downloaded it +if [[ "$DOWNLOADED" == "1" ]] ; then + echo "Building Mesos..." + mkdir /root/mesos/build + pushd /root/mesos/build > /dev/null 2>&1 + ./configure.amazon-linux-64 + make clean + make + popd > /dev/null 2>&1 + if [ -d /root/spark ] ; then + echo "Building Spark..." + pushd /root/spark > /dev/null 2>&1 + git pull + sbt/sbt clean compile + popd > /dev/null 2>&1 + fi + echo "Building Hadoop framework..." + pushd /root/mesos/build > /dev/null 2>&1 + make hadoop + rm -fr /root/hadoop-mesos + mv /root/mesos/build/hadoop/hadoop-0.20.205.0 /root/hadoop-mesos + popd > /dev/null 2>&1 +fi + +echo "Setting up Hadoop framework config files..." +cp hadoop-framework-conf/* /root/hadoop-mesos/conf + +echo "Deploying Hadoop framework config files..." +/root/spark-ec2/copy-dir /root/hadoop-mesos/conf + +echo "Redeploying /root/mesos..." +./redeploy-mesos + +if [[ $NUM_ZOOS != 0 ]]; then + echo "Starting ZooKeeper quorum..." + for zoo in $ZOOS; do + ssh $SSH_OPTS $zoo "/root/mesos/third_party/zookeeper-*/bin/zkServer.sh start /dev/null" & sleep 0.1 + done + wait + sleep 5 +fi + +echo "Stopping any existing Mesos cluster..." +./stop-mesos +sleep 2 + +echo "Starting Mesos cluster..." +./start-mesos diff --git a/mesos/start-mesos b/mesos/start-mesos new file mode 100755 index 00000000..b0b84cef --- /dev/null +++ b/mesos/start-mesos @@ -0,0 +1,54 @@ +#!/bin/bash +cd /root/spark-ec2 + +MASTERS=`cat masters` +ACTIVE_MASTER=`cat masters | head -1` +SLAVES=`cat slaves` +ZOOS=`cat zoo` + + +if [[ $ZOOS = *NONE* ]]; then + NUM_ZOOS=0 +else + NUM_ZOOS=`cat zoo | wc -l` +fi + +SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5" + +cluster_url=`cat cluster-url` + +echo "Running with cluster URL: "$cluster_url + +if [[ $NUM_ZOOS != 0 ]]; then + masterid=1 + for master in $MASTERS; do + echo "Starting master $masterid on $master" + ssh $SSH_OPTS $master "/root/spark-ec2/mesos/mesos-daemon mesos-master -p 5050 -u $cluster_url $@ /dev/null" & sleep 0.3 + masterid=$(($masterid+1)) + done + wait +else + echo "Starting master on $ACTIVE_MASTER" + ssh $SSH_OPTS $ACTIVE_MASTER "/root/spark-ec2/mesos/mesos-daemon mesos-master --failover_timeout=1 -p 5050 $@ /dev/null" +fi + +sleep 5 + +for slave in $SLAVES; do + echo "Starting slave on $slave" + ssh $SSH_OPTS $slave "/root/spark-ec2/mesos/mesos-daemon mesos-slave -m ${cluster_url} /dev/null" & + sleep 0.3 +done +wait + +if [[ $NUM_ZOOS != 0 ]]; then + echo "ZooKeeper is running at" + for zoo in $ZOOS; do + echo " $zoo:2181" + done +fi + +echo "Everything's started! You can view the master Web UI at" +for master in $MASTERS; do + echo " http://$master:8080" +done diff --git a/mesos/stop-mesos b/mesos/stop-mesos new file mode 100755 index 00000000..9fdb8753 --- /dev/null +++ b/mesos/stop-mesos @@ -0,0 +1,21 @@ +#!/bin/bash +cd /root/mesos-ec2 + +MASTERS=`cat masters` +SLAVES=`cat slaves` + +SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5" + +for slave in $SLAVES; do + echo "Stopping slave on $slave" + ssh $SSH_OPTS $slave pkill mesos-slave & + sleep 0.1 +done +wait + +for master in $MASTERS; do + echo "Stopping master on $master" + ssh $SSH_OPTS $master pkill mesos-master & + sleep 0.1 +done +wait diff --git a/persistent-hdfs/setup.sh b/persistent-hdfs/setup.sh new file mode 100755 index 00000000..05abd05a --- /dev/null +++ b/persistent-hdfs/setup.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +PERSISTENT_HDFS=/root/persistent-hdfs + +mkdir -p /mnt/persistent-hdfs/logs +for node in $SLAVES $OTHER_MASTERS; do + ssh -t $SSH_OPTS root@$node "mkdir -p /mnt/persistent-hdfs/logs" & sleep 0.3 +done + +if [[ ! -e /vol/persistent-hdfs/dfs/name ]] ; then + echo "Formatting persistent HDFS namenode..." + $PERSISTENT_HDFS/bin/hadoop namenode -format +fi + +echo "Starting persistent HDFS..." +$PERSISTENT_HDFS/bin/start-dfs.sh diff --git a/setup-slave.sh b/setup-slave.sh new file mode 100755 index 00000000..8e8c4308 --- /dev/null +++ b/setup-slave.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# Make sure we are in the spark-ec2 directory +cd /root/spark-ec2 + +source ec2-variables.sh + +# Set hostname based on EC2 private DNS name, so that it is set correctly +# even if the instance is restarted with a different private DNS name +PRIVATE_DNS=`wget -q -O - http://instance-data.ec2.internal/latest/meta-data/local-hostname` +hostname $PRIVATE_DNS +echo $PRIVATE_DNS > /etc/hostname +HOSTNAME=$PRIVATE_DNS # Fix the bash built-in hostname variable too + +echo "Setting up slave on `hostname`..." + +# Mount options to use for ext3 and xfs disks (the ephemeral disks +# are ext3, but we use xfs for EBS volumes to format them faster) +EXT3_MOUNT_OPTS="defaults,noatime,nodiratime" + +# Mount any ephemeral volumes we might have beyond /mnt +function setup_extra_volume { + device=$1 + mount_point=$2 + if [[ -e $device && ! -e $mount_point ]]; then + mkdir -p $mount_point + mount -o $EXT3_MOUNT_OPTS $device $mount_point + echo "$device $mount_point auto $EXT3_MOUNT_OPTS 0 0" >> /etc/fstab + fi +} +setup_extra_volume /dev/xvdc /mnt2 +setup_extra_volume /dev/xvdd /mnt3 +setup_extra_volume /dev/xvde /mnt4 + +# Mount cgroup file system +if [[ ! -e /cgroup ]]; then + mkdir -p /cgroup + mount -t cgroup none /cgroup + echo "none /cgroup cgroup defaults 0 0" >> /etc/fstab +fi + +# Format and mount EBS volume (/dev/sdv) as /vol if the device exists +# and we have not already created /vol +if [[ -e /dev/sdv && ! -e /vol ]]; then + mkdir /vol + if mkfs.xfs -q /dev/sdv; then + mount -o $XFS_MOUNT_OPTS /dev/sdv /vol + echo "/dev/sdv /vol xfs $XFS_MOUNT_OPTS 0 0" >> /etc/fstab + chmod -R a+w /vol + else + # mkfs.xfs is not installed on this machine or has failed; + # delete /vol so that the user doesn't think we successfully + # mounted the EBS volume + rmdir /vol + fi +elif [[ ! -e /vol ]]; then + # Not using EBS, but let's mkdir /vol so that we can chmod it + mkdir /vol + chmod -R a+w /vol +fi + +# Make data dirs writable by non-root users, such as CDH's hadoop user +chmod -R a+w /mnt* + +# Remove ~/.ssh/known_hosts because it gets polluted as you start/stop many +# clusters (new machines tend to come up under old hostnames) +rm -f /root/.ssh/known_hosts + +# Create swap space on /mnt +/root/spark-ec2/create-swap $SWAP_MB diff --git a/setup.sh b/setup.sh new file mode 100755 index 00000000..3eddb7cc --- /dev/null +++ b/setup.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +# Make sure we are in the spark-ec2 directory +cd /root/spark-ec2 + +# Load the cluster variables set by the deploy script +source ec2-variables.sh + +# Set hostname based on EC2 private DNS name, so that it is set correctly +# even if the instance is restarted with a different private DNS name +PRIVATE_DNS=`wget -q -O - http://instance-data.ec2.internal/latest/meta-data/local-hostname` +hostname $PRIVATE_DNS +echo $PRIVATE_DNS > /etc/hostname +export HOSTNAME=$PRIVATE_DNS # Fix the bash built-in hostname variable too + +echo "Setting up Spark on `hostname`..." + +# Set up the masters, slaves, etc files based on cluster env variables +echo "$MASTERS" > masters +echo "$SLAVES" > slaves + +MASTERS=`cat masters` +NUM_MASTERS=`cat masters | wc -l` +OTHER_MASTERS=`cat masters | sed '1d'` +SLAVES=`cat slaves` + +JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk.x86_64 +SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5" + +if [[ `tty` == "not a tty" ]] ; then + echo "Expecting a tty or pty! (use the ssh -t option)." + exit 1 +fi + +echo "Setting executable permissions on scripts..." +find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x + +echo "SSH'ing to master machine(s) to approve key(s)..." +for master in $MASTERS; do + echo $master + ssh $SSH_OPTS $master echo -n & + sleep 0.3 +done +ssh $SSH_OPTS localhost echo -n & +ssh $SSH_OPTS `hostname` echo -n & +wait + +# Try to SSH to each cluster node to approve their key. Since some nodes may +# be slow in starting, we retry failed slaves up to 3 times. +TODO="$SLAVES $OTHER_MASTERS" # List of nodes to try (initially all) +TRIES="0" # Number of times we've tried so far +echo "SSH'ing to other cluster nodes to approve keys..." +while [ "e$TODO" != "e" ] && [ $TRIES -lt 4 ] ; do + NEW_TODO= + for slave in $TODO; do + echo $slave + ssh $SSH_OPTS $slave echo -n + if [ $? != 0 ] ; then + NEW_TODO="$NEW_TODO $slave" + fi + done + TRIES=$[$TRIES + 1] + if [ "e$NEW_TODO" != "e" ] && [ $TRIES -lt 4 ] ; then + sleep 15 + TODO="$NEW_TODO" + echo "Re-attempting SSH to cluster nodes to approve keys..." + else + break; + fi +done + +echo "RSYNC'ing /root/spark-ec2 to other cluster nodes..." +for node in $SLAVES $OTHER_MASTERS; do + echo $node + rsync -e "ssh $SSH_OPTS" -az /root/spark-ec2 $node:/root & + scp $SSH_OPTS ~/.ssh/id_rsa $node:.ssh & + sleep 0.3 +done +wait + +echo "Running setup-slave on master to mount filesystems, etc..." +source ./setup-slave.sh + +echo "Running slave setup script on other cluster nodes..." +for node in $SLAVES $OTHER_MASTERS; do + echo $node + ssh -t $SSH_OPTS root@$node "spark-ec2/setup-slave $SWAP_MB" & sleep 0.3 +done +wait + +# Set environment variables required by templates +# TODO: Make this general by using a init.sh per module ? +./mesos/compute_cluster_url.py > ./cluster-url +export MESOS_CLUSTER_URL=`cat ./cluster-url` + +# Deploy templates +# TODO: Move configuring templates to a per-module ? +echo "Creating local config files..." +./deploy_templates.py + +# Setup each module +for module in $MODULES; do + echo "Setting up $module" + source ./$module/setup.sh + sleep 1 +done diff --git a/spark/setup.sh b/spark/setup.sh new file mode 100755 index 00000000..566adc42 --- /dev/null +++ b/spark/setup.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +echo "Setting up Spark config files..." +# TODO: This currently overwrites whatever the user wrote there; on +# the other hand, we also don't want to leave an old file created by +# us because it would have the wrong hostname for HDFS etc +mkdir -p /root/spark/conf +chmod u+x /root/spark/conf/spark-env.sh + +echo "Deploying Spark config files..." +/root/spark-ec2/copy-dir /root/spark/conf + +# Add stuff for standalone mode here, using an environment variable diff --git a/ssh-no-keychecking.sh b/ssh-no-keychecking.sh new file mode 100755 index 00000000..3daf46fe --- /dev/null +++ b/ssh-no-keychecking.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +# Utility script that exec's SSH without key checking so that we can check +# out code from GitHub without prompting the user. + +exec ssh -o StrictHostKeyChecking=no $@ diff --git a/templates/root/ephemeral-hdfs/conf/core-site.xml b/templates/root/ephemeral-hdfs/conf/core-site.xml new file mode 100644 index 00000000..565f54dd --- /dev/null +++ b/templates/root/ephemeral-hdfs/conf/core-site.xml @@ -0,0 +1,23 @@ + + + + + + + + + hadoop.tmp.dir + /mnt/ephemeral-hdfs + + + + fs.default.name + hdfs://{{active_master}}:9000 + + + + io.file.buffer.size + 65536 + + + diff --git a/templates/root/ephemeral-hdfs/conf/hadoop-env.sh b/templates/root/ephemeral-hdfs/conf/hadoop-env.sh new file mode 100755 index 00000000..4e1e6991 --- /dev/null +++ b/templates/root/ephemeral-hdfs/conf/hadoop-env.sh @@ -0,0 +1,66 @@ +# Set Hadoop-specific environment variables here. + +# The only required environment variable is JAVA_HOME. All others are +# optional. When running a distributed configuration it is best to +# set JAVA_HOME in this file, so that it is correctly defined on +# remote nodes. + +# The java implementation to use. Required. +export JAVA_HOME=/usr/lib/jvm/java-1.6.0 + +# Extra Java CLASSPATH elements. Optional. +# export HADOOP_CLASSPATH= + +# The maximum amount of heap to use, in MB. Default is 1000. +export HADOOP_HEAPSIZE=1000 + +# Extra Java runtime options. Empty by default. +# export HADOOP_OPTS=-server +export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true" + +# Command specific options appended to HADOOP_OPTS when specified +export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS" +export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS" +export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS" +export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS" +export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS" +# export HADOOP_TASKTRACKER_OPTS= +# The following applies to multiple commands (fs, dfs, fsck, distcp etc) +# export HADOOP_CLIENT_OPTS + +# Extra ssh options. Empty by default. +# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR" +export HADOOP_SSH_OPTS="-o ConnectTimeout=5" + +# Where log files are stored. $HADOOP_HOME/logs by default. +# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs +export HADOOP_LOG_DIR=/mnt/ephemeral-hdfs/logs + +# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default. +# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves + +# host:path where hadoop code should be rsync'd from. Unset by default. +# export HADOOP_MASTER=master:/home/$USER/src/hadoop + +# Seconds to sleep between slave commands. Unset by default. This +# can be useful in large clusters, where, e.g., slave rsyncs can +# otherwise arrive faster than the master can service them. +# export HADOOP_SLAVE_SLEEP=0.1 + +# The directory where pid files are stored. /tmp by default. +export HADOOP_PID_DIR=/var/hadoop/ephemeral-hdfs/pids + +# A string representing this instance of hadoop. $USER by default. +# export HADOOP_IDENT_STRING=$USER + +# The scheduling priority for daemon processes. See 'man nice'. +# export HADOOP_NICENESS=10 + +# Set hadoop user for CDH (which doesn't allow running as root) +export HADOOP_NAMENODE_USER=hadoop +export HADOOP_DATANODE_USER=hadoop +export HADOOP_SECONDARYNAMENODE_USER=hadoop +export HADOOP_JOBTRACKER_USER=hadoop +export HADOOP_TASKTRACKER_USER=hadoop + +ulimit -n 16000 diff --git a/templates/root/ephemeral-hdfs/conf/hdfs-site.xml b/templates/root/ephemeral-hdfs/conf/hdfs-site.xml new file mode 100644 index 00000000..43e68aa3 --- /dev/null +++ b/templates/root/ephemeral-hdfs/conf/hdfs-site.xml @@ -0,0 +1,36 @@ + + + + + + + dfs.replication + 3 + + + + dfs.block.size + 134217728 + + + + dfs.data.dir + {{hdfs_data_dirs}} + + + + dfs.namenode.handler.count + 25 + + + + dfs.datanode.handler.count + 8 + + + + dfs.permissions + false + + + diff --git a/templates/root/ephemeral-hdfs/conf/mapred-site.xml b/templates/root/ephemeral-hdfs/conf/mapred-site.xml new file mode 100644 index 00000000..b1637dc8 --- /dev/null +++ b/templates/root/ephemeral-hdfs/conf/mapred-site.xml @@ -0,0 +1,29 @@ + + + + + + + + + mapred.job.tracker + {{active_master}}:9001 + + + + mapred.tasktracker.map.tasks.maximum + 4 + The maximum number of map tasks that will be run + simultaneously by a task tracker. + + + + + mapred.tasktracker.reduce.tasks.maximum + 2 + The maximum number of reduce tasks that will be run + simultaneously by a task tracker. + + + + diff --git a/templates/root/ephemeral-hdfs/conf/masters b/templates/root/ephemeral-hdfs/conf/masters new file mode 100644 index 00000000..d26a1943 --- /dev/null +++ b/templates/root/ephemeral-hdfs/conf/masters @@ -0,0 +1 @@ +{{active_master}} diff --git a/templates/root/ephemeral-hdfs/conf/slaves b/templates/root/ephemeral-hdfs/conf/slaves new file mode 100644 index 00000000..05f969e0 --- /dev/null +++ b/templates/root/ephemeral-hdfs/conf/slaves @@ -0,0 +1 @@ +{{slave_list}} diff --git a/templates/root/mesos-ec2/hadoop-framework-conf/core-site.xml b/templates/root/mesos-ec2/hadoop-framework-conf/core-site.xml new file mode 100644 index 00000000..818ed103 --- /dev/null +++ b/templates/root/mesos-ec2/hadoop-framework-conf/core-site.xml @@ -0,0 +1,23 @@ + + + + + + + + + hadoop.tmp.dir + /mnt/hadoop-framework + + + + fs.default.name + hdfs://{{active_master}}:9000 + + + + io.file.buffer.size + 65536 + + + diff --git a/templates/root/mesos-ec2/hadoop-framework-conf/hadoop-env.sh b/templates/root/mesos-ec2/hadoop-framework-conf/hadoop-env.sh new file mode 100755 index 00000000..6db81547 --- /dev/null +++ b/templates/root/mesos-ec2/hadoop-framework-conf/hadoop-env.sh @@ -0,0 +1,72 @@ +# Set Hadoop-specific environment variables here. + +# The only required environment variable is JAVA_HOME. All others are +# optional. When running a distributed configuration it is best to +# set JAVA_HOME in this file, so that it is correctly defined on +# remote nodes. + +# The java implementation to use. Required. +export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk.x86_64 + +# Mesos build directory, useful for finding JARs and the native library. +export MESOS_BUILD_DIR=/root/mesos/build + +# Google protobuf (necessary for running the MesosScheduler). +export PROTOBUF_JAR=${MESOS_BUILD_DIR}/protobuf-2.3.0.jar + +# Mesos. +MESOS_VERSION=`echo @PACKAGE_VERSION@ | ${MESOS_BUILD_DIR}/config.status --file=-:-` +export MESOS_JAR=${MESOS_BUILD_DIR}/src/mesos-${MESOS_VERSION}.jar + +# Native Mesos library. +export MESOS_NATIVE_LIBRARY=${MESOS_BUILD_DIR}/src/.libs/libmesos.so + +# Extra Java CLASSPATH elements. Optional. +export HADOOP_CLASSPATH=${HADOOP_HOME}/build/contrib/mesos/classes:${MESOS_JAR}:${PROTOBUF_JAR} + +# The maximum amount of heap to use, in MB. Default is 1000. +export HADOOP_HEAPSIZE=1000 + +# Extra Java runtime options. Empty by default. +# export HADOOP_OPTS=-server +export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true" + +# Command specific options appended to HADOOP_OPTS when specified +export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS" +export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS" +export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS" +export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS" +export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS" +# export HADOOP_TASKTRACKER_OPTS= +# The following applies to multiple commands (fs, dfs, fsck, distcp etc) +# export HADOOP_CLIENT_OPTS + +# Extra ssh options. Empty by default. +# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR" +export HADOOP_SSH_OPTS="-o ConnectTimeout=5" + +# Where log files are stored. $HADOOP_HOME/logs by default. +# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs +export HADOOP_LOG_DIR=/mnt/hadoop-logs + +# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default. +# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves + +# host:path where hadoop code should be rsync'd from. Unset by default. +# export HADOOP_MASTER=master:/home/$USER/src/hadoop + +# Seconds to sleep between slave commands. Unset by default. This +# can be useful in large clusters, where, e.g., slave rsyncs can +# otherwise arrive faster than the master can service them. +# export HADOOP_SLAVE_SLEEP=0.1 + +# The directory where pid files are stored. /tmp by default. +# export HADOOP_PID_DIR=/var/hadoop/pids + +# A string representing this instance of hadoop. $USER by default. +# export HADOOP_IDENT_STRING=$USER + +# The scheduling priority for daemon processes. See 'man nice'. +# export HADOOP_NICENESS=10 + +ulimit -n 10000 diff --git a/templates/root/mesos-ec2/hadoop-framework-conf/mapred-site.xml b/templates/root/mesos-ec2/hadoop-framework-conf/mapred-site.xml new file mode 100644 index 00000000..0ffa92f1 --- /dev/null +++ b/templates/root/mesos-ec2/hadoop-framework-conf/mapred-site.xml @@ -0,0 +1,83 @@ + + + + + + + + + mapred.job.tracker + {{active_master}}:9001 + + + + mapred.local.dir + {{mapred_local_dirs}} + + + + mapred.jobtracker.taskScheduler + org.apache.hadoop.mapred.MesosScheduler + + + + mapred.mesos.master + {{cluster_url}} + + + + io.file.buffer.size + 65536 + + + + mapred.job.tracker.handler.count + 20 + + + + tasktracker.http.threads + 40 + + + + mapred.child.java.opts + -Xmx500m + + + + mapred.mesos.task.mem + 500 + + + + mapred.job.reuse.jvm.num.tasks + -1 + + + + io.sort.factor + 15 + + + + io.sort.mb + 150 + + + + mapred.mesos.localitywait + 5000 + + + + mapred.tasktracker.map.tasks.maximum + 8 + + + + mapred.tasktracker.reduce.tasks.maximum + 8 + + + diff --git a/templates/root/mesos-ec2/haproxy+apache/haproxy.config.template b/templates/root/mesos-ec2/haproxy+apache/haproxy.config.template new file mode 100644 index 00000000..957c3f6a --- /dev/null +++ b/templates/root/mesos-ec2/haproxy+apache/haproxy.config.template @@ -0,0 +1,8 @@ +listen webfarm {{active_master}}:80 + timeout server 7500 + timeout client 7500 + timeout connect 7500 + mode http + balance roundrobin + option httpchk HEAD /index.html HTTP/1.0 + stats uri /stats diff --git a/templates/root/mesos-ec2/hypertable/Capfile b/templates/root/mesos-ec2/hypertable/Capfile new file mode 100644 index 00000000..c53c0976 --- /dev/null +++ b/templates/root/mesos-ec2/hypertable/Capfile @@ -0,0 +1,463 @@ +set :source_machine, "{{active_master}}" +set :install_dir, "/opt/hypertable" +set :hypertable_version, "0.9.5.0.pre3" +set :default_pkg, "/tmp/hypertable-0.9.5.0.pre3-linux-x86_64.deb" +set :default_dfs, "hadoop" +set :default_config, "/root/spark-ec2/hypertable/hypertable.cfg" +set :default_additional_args, "" +set :hbase_home, "/opt/hbase/current" +set :default_client_multiplier, 1 +set :default_test_driver, "hypertable" +set :default_test_args, "" + +role :source, "{{active_master}}" +role :master, "{{active_master}}" +role :hyperspace, "{{active_master}}" +open("/root/spark-ec2/slaves").each do |slave| + role :slave, slave +end +role :localhost, "{{active_master}}" +role :thriftbroker +role :spare +role :test_client +role :test_dispatcher + +######################### END OF USER CONFIGURATION ############################ + +def supported_pkgs + {"rpm"=>1, "deb"=>1} +end + +def pkg_regex + '.*\.(deb|rpm)$' +end + +set(:pkg) do + "#{default_pkg}" +end unless exists?(:pkg) + +set(:dfs) do + "#{default_dfs}" +end unless exists?(:dfs) + +set(:config) do + "#{default_config}" +end unless exists?(:config) + +set(:additional_args) do + "#{default_additional_args}" +end unless exists?(:additional_args) + +set(:test_driver) do + "#{default_test_driver}" +end unless exists?(:test_driver) + +set(:test_args) do + "#{default_test_args}" +end unless exists?(:test_args) + +set(:client_multiplier) do + "#{default_client_multiplier}".to_i +end unless exists?(:client_multiplier) + +set :config_file, "#{config}".split('/')[-1] +set :config_option, \ + "--config=#{install_dir}/#{hypertable_version}/conf/#{config_file}" + + desc <<-DESC + + desc <<-DESC + Copies config file to installation on localhost. + This task runs on localhost and copies the config file specified \ + by the variable 'config' (default=#{config}) \ + to the installation directory specified by the variable 'install_dir' \ + (default-#{install_dir}) + DESC +task :copy_config_local, :roles => :localhost do + run("rsync -e \"ssh -o StrictHostKeyChecking=no\" #{config} #{install_dir}/#{hypertable_version}/conf") +end + + desc <<-DESC + Copies config file to installation on all servers in cluster. + This task copies the dir\ + #{source_machine}:#{install_dir}/{#hypertable_version}/conf + to all machines in the cluster + DESC +task :push_config_all do + run <<-CMD + rsync -av -e "ssh -o StrictHostKeyChecking=no" --exclude=log --exclude=run --exclude=demo --exclude=fs --exclude=hyperspace #{source_machine}:#{install_dir}/#{hypertable_version}/conf/ #{install_dir}/#{hypertable_version}/conf + CMD +end + + desc <<-DESC + Copies config file to installation dir on localhost.\ + Then copies entire conf fir to all servers in cluster. + DESC +task :push_config do + copy_config_local + push_config_all +end + + desc <<-DESC + rsyncs installation directory to cluster. For each machine in the \ + cluster, his commannd rsyncs the installation from the source \ + installation machine specified by the variable 'source_machine' \ + (default=#{source_machine}) + DESC +task :rsync do + run <<-CMD + rsync -av -e "ssh -o StrictHostKeyChecking=no" --exclude=log --exclude=run --exclude=demo --exclude=fs --exclude=conf --exclude=hyperspace #{source_machine}:#{install_dir}/#{hypertable_version} #{install_dir} && + rsync -av -e "ssh -o StrictHostKeyChecking=no" --exclude=log --exclude=run --exclude=demo --exclude=fs --exclude=hyperspace #{source_machine}:#{install_dir}/#{hypertable_version}/conf/ #{install_dir}/#{hypertable_version}/conf + CMD +end + + desc <<-DESC + sets up the symbolic link 'current' in the installation area \ + to point to the directory of the current version + (default=#{hypertable_version}) + DESC +task :set_current, :roles => [:master, :hyperspace, :slave, :thriftbroker, :spare] do + run <<-CMD + cd #{install_dir} && + rm -f current && + ln -s #{hypertable_version} current + CMD +end + + desc <<-DESC + Distributes installation. This task rsyncs everything under\ + #{source_machine}:#{install_dir}/#{hypertable_version} to #{install_dir}\ + on all machines in the cluster + DESC +task :dist do + transaction do + rsync + end +end + + desc <<-DESC + Alias for install_package command + DESC +task :install_pkg do + install_package +end + + desc <<-DESC + rsyncs binary packages and installs on each machine in the cluster + DESC +task :install_package, :roles => [:master, :hyperspace, :slave, :thriftbroker, :spare] do + pkg_basename = File.basename(pkg) + pkg_basename =~ /#{pkg_regex}/ + pkg_type = $1 + + if (!supported_pkgs.has_key?(pkg_type)) + raise "Package file #{pkg} is of unsupported type. Expected one of #{supported_pkgs.keys.inspect}" + end + if (/-#{hypertable_version}-/ =~ pkg_basename).nil? + raise "Package #{pkg} doesn't match version #{hypertable_version}" + end + run("rsync -e \"ssh -o StrictHostKeyChecking=no\" #{source_machine}:#{pkg} #{install_dir}/") + + if (pkg_type == "deb") + run("dpkg -i #{install_dir}/#{pkg_basename} && rm #{install_dir}/#{pkg_basename}") + else + run("rpm -ivh --replacepkgs --nomd5 #{install_dir}/#{pkg_basename} && rm #{install_dir}/#{pkg_basename}") + end +end + + desc <<-DESC + fhsize's the installations + DESC +task :fhsize do + transaction do + run <<-CMD + #{install_dir}/#{hypertable_version}/bin/fhsize.sh + CMD + end +end + + desc <<-DESC + Upgrades installation. Checks upgrade, fhsizes if needed + then copies hyperspace and the rangeserver + state in the run/ directory to new installation + DESC +task :upgrade do + transaction do + qualify_upgrade + upgrade_all + set_current + end +end + + desc <<-DESC + Verify that upgrade is OK. + DESC +task :qualify_upgrade, :roles => :source do + run <<-CMD + #{install_dir}/#{hypertable_version}/bin/upgrade-ok.sh \ + #{install_dir}/current #{hypertable_version} + CMD +end + + desc <<-DESC + Upgrades (copies or uses previous symlink) for "hyperspace", "conf", "run", "log" + and "fs" dirs from the current installation to + installation specified by the hypertable_version + (#{hypertable_version}) +DESC +task :upgrade_all, :roles => [:master, :hyperspace, :slave, :thriftbroker, :spare] do + run <<-CMD + #{install_dir}/#{hypertable_version}/bin/upgrade.sh \ + #{install_dir}/current #{hypertable_version} + CMD +end + +desc "Starts all processes." +task :start do + transaction do + start_hyperspace + start_master + start_slaves + start_master_thriftbroker + end +end + +desc "Starts hyperspace processes." +task :start_hyperspace, :roles => :hyperspace do + run <<-CMD + #{install_dir}/current/bin/start-hyperspace.sh \ + #{config_option} + CMD +end + +desc "Starts master processes." +task :start_master, :roles => :master do + run <<-CMD + #{install_dir}/current/bin/start-dfsbroker.sh #{dfs} \ + #{config_option} && + #{install_dir}/current/bin/start-master.sh #{config_option} && + #{install_dir}/current/bin/start-monitoring.sh + CMD +end + +desc "Starts ThriftBroker on master." +task :start_master_thriftbroker, :roles => :master do + run <<-CMD + #{install_dir}/current/bin/start-thriftbroker.sh \ + #{config_option} + CMD +end + +desc "Starts slave processes." +task :start_slaves, :roles => :slave do + run <<-CMD + #{install_dir}/current/bin/random-wait.sh 5 && + #{install_dir}/current/bin/start-dfsbroker.sh #{dfs} \ + #{config_option} && + #{install_dir}/current/bin/start-rangeserver.sh \ + #{config_option} && + #{install_dir}/current/bin/start-thriftbroker.sh \ + #{config_option} + CMD +end + +desc "Starts ThriftBroker processes." +task :start_thriftbrokers, :roles => :thriftbroker do + run <<-CMD + #{install_dir}/current/bin/random-wait.sh 5 && + #{install_dir}/current/bin/start-dfsbroker.sh #{dfs} \ + #{config_option} && + #{install_dir}/current/bin/start-thriftbroker.sh \ + #{config_option} + CMD +end + + +desc "Starts DFS brokers." +task :start_dfsbrokers, :roles => [:master, :slave] do + run "#{install_dir}/current/bin/start-dfsbroker.sh #{dfs} \ + #{config_option}" +end + +desc "Stops all servers." +task :stop do + transaction do + stop_master + stop_slaves + stop_hyperspace + stop_dfsbrokers + end +end + +desc "Stops DFS brokers." +task :stop_dfsbrokers, :roles => [:master, :slave] do + run <<-CMD + #{install_dir}/current/bin/stop-servers.sh #{additional_args} + CMD +end + +desc "Stops slave processes." +task :stop_slaves, :roles => :slave do + run <<-CMD + #{install_dir}/current/bin/stop-servers.sh --no-hyperspace --no-master --no-dfsbroker #{additional_args} + CMD +end + +desc "Stops master processes." +task :stop_master, :roles => :master do + run <<-CMD + #{install_dir}/current/bin/stop-servers.sh --no-hyperspace --no-rangeserver --no-dfsbroker #{additional_args} && + #{install_dir}/current/bin/stop-monitoring.sh + CMD +end + +desc "Stops hyperspace processes." +task :stop_hyperspace, :roles => :hyperspace do + run <<-CMD + #{install_dir}/current/bin/stop-hyperspace.sh + CMD +end + +desc "Stops ThriftBroker processes." +task :stop_thriftbrokers, :roles => :thriftbroker do + run <<-CMD + #{install_dir}/current/bin/stop-servers.sh --no-hyperspace --no-master --no-rangeserver + CMD +end + +desc "Cleans hyperspace & rangeservers, removing all tables." +task :cleandb do + transaction do + clean_master + clean_hyperspace + clean_slaves + end +end + +desc "Cleans master state but not hyperspace." +task :clean_master, :roles => :master do + run <<-CMD + #{install_dir}/current/bin/start-dfsbroker.sh #{dfs} \ + #{config_option} && \ + #{install_dir}/current/bin/clean-database.sh #{config_option} ; + CMD +end + +desc "Cleans hyperspace." +task :clean_hyperspace, :roles => :hyperspace do + run <<-CMD + #{install_dir}/current/bin/clean-hyperspace.sh + CMD +end + +desc "Cleans rangeservers and master state but not hyperspace." +task :clean_slaves, :roles => :slave do + run <<-CMD + #{install_dir}/current/bin/stop-servers.sh --no-hyperspace --no-master && + rm -rf #{install_dir}/current/run/* + CMD +end + +desc "Reports status for all processes." +task :status do + transaction do + dfs_status + master_status + hyperspace_status + rangeserver_status + end +end + +desc "Get status for dfs processes." +task :dfs_status, :roles => [:master, :slave] do + run <<-CMD + #{install_dir}/current/bin/ht serverup dfsbroker + CMD +end + +desc "Get status for Hypertable.Master process." +task :master_status, :roles => [:master] do + run <<-CMD + #{install_dir}/current/bin/ht serverup master + CMD +end + +desc "Get status for Hyperspace.Master process." +task :hyperspace_status, :roles => [:hyperspace] do + run <<-CMD + #{install_dir}/current/bin/ht serverup hyperspace + CMD +end + +desc "Get status for rangeserver processes." +task :rangeserver_status, :roles => [:slave] do + run <<-CMD + #{install_dir}/current/bin/ht serverup rangeserver + CMD +end + +set :default_dumpfile, "/tmp/rsdump.txt" + +set(:dumpfile) do + "#{default_dumpfile}" +end unless exists?(:dumpfile) + +desc "Run dump command on each rangeserver" +task :rangeserver_dump, :roles => [:slave] do + run <<-CMD + echo "dump NOKEYS '#{dumpfile}';" | #{install_dir}/current/bin/ht ht_rsclient --batch #{config_option} + CMD +end + + +if "#{test_driver}" == "hypertable" + set :thrift_broker_command, "#{install_dir}/current/bin/start-thriftbroker.sh #{config_option}" + set :start_test_client_command, "#{install_dir}/current/bin/start-test-client.sh --count #{client_multiplier} #{roles[:test_dispatcher].servers[0]}" + set :run_test_dispatcher_command, "#{install_dir}/current/bin/jrun --pidfile #{install_dir}/#{hypertable_version}/run/Hypertable.TestDispatcher.pid org.hypertable.examples.PerformanceTest.Dispatcher --driver=#{test_driver} --clients=#{roles[:test_client].servers.length*client_multiplier} #{test_args}" + set :stop_test_args, "" +elsif "#{test_driver}" == "hbase" + set :thrift_broker_command, "echo -n" + set :start_test_client_command, "#{install_dir}/current/bin/start-test-client.sh --jrun-opts \"--add-to-classpath #{hbase_home}/conf\" --count #{client_multiplier} #{roles[:test_dispatcher].servers[0]}" + set :run_test_dispatcher_command, "#{install_dir}/current/bin/jrun --pidfile #{install_dir}/#{hypertable_version}/run/Hypertable.TestDispatcher.pid --add-to-classpath #{hbase_home}/conf org.hypertable.examples.PerformanceTest.Dispatcher --driver=#{test_driver} --clients=#{roles[:test_client].servers.length*client_multiplier} #{test_args}" + set :stop_test_args, "--no-thriftbroker --no-dfsbroker" +else + set :thrift_broker_command, "echo Invalid test driver - #{test_driver}" + set :start_test_client_command, "echo Invalid test driver - #{test_driver}" + set :run_test_dispatcher_command, "echo Invalid test driver - #{test_driver}" + set :stop_test_args, "--no-thriftbroker --no-dfsbroker" +end + +desc "Starts test clients." +task :start_test_clients, :roles => :test_client do + run <<-CMD + #{install_dir}/current/bin/random-wait.sh 5 && + #{thrift_broker_command} && + #{start_test_client_command} + CMD +end + +desc "Run test dispatcher." +task :run_test_dispatcher, :roles => :test_dispatcher do + run <<-CMD + #{thrift_broker_command} && + #{run_test_dispatcher_command} + CMD +end + +desc "Stops test." +task :stop_test, :roles => [:test_client, :test_dispatcher] do + run <<-CMD + #{install_dir}/current/bin/stop-servers.sh --no-hyperspace --no-master --no-rangeserver #{stop_test_args} + CMD +end + +desc "Run test" +task :run_test do + transaction do + stop_test + start_test_clients + run_test_dispatcher + end +end + + diff --git a/templates/root/mesos-ec2/hypertable/hypertable.cfg b/templates/root/mesos-ec2/hypertable/hypertable.cfg new file mode 100644 index 00000000..b4d5b747 --- /dev/null +++ b/templates/root/mesos-ec2/hypertable/hypertable.cfg @@ -0,0 +1,42 @@ +# +# hypertable.cfg +# + +# HDFS Broker +HdfsBroker.Port=38030 +HdfsBroker.fs.default.name=hdfs://{{active_master}}:9010 +HdfsBroker.Workers=20 + +# Ceph Broker +CephBroker.Port=38030 +CephBroker.Workers=20 +CephBroker.MonAddr=10.0.1.245:6789 + +# Local Broker +DfsBroker.Local.Port=38030 +DfsBroker.Local.Root=fs/local + +# DFS Broker - for clients +DfsBroker.Host=localhost +DfsBroker.Port=38030 + +# Hyperspace +Hyperspace.Replica.Host={{active_master}} +Hyperspace.Replica.Port=38040 +Hyperspace.Replica.Dir=hyperspace +Hyperspace.Replica.Workers=20 + +# Hypertable.Master +Hypertable.Master.Port=38050 +Hypertable.Master.Workers=20 + + +# Hypertable.RangeServer +Hypertable.RangeServer.Port=38060 + +Hyperspace.KeepAlive.Interval=30000 +Hyperspace.Lease.Interval=1000000 +Hyperspace.GracePeriod=200000 + +# ThriftBroker +ThriftBroker.Port=38080 diff --git a/templates/root/persistent-hdfs/conf/core-site.xml b/templates/root/persistent-hdfs/conf/core-site.xml new file mode 100644 index 00000000..b23aef25 --- /dev/null +++ b/templates/root/persistent-hdfs/conf/core-site.xml @@ -0,0 +1,23 @@ + + + + + + + + + hadoop.tmp.dir + /vol/persistent-hdfs + + + + fs.default.name + hdfs://{{active_master}}:9010 + + + + io.file.buffer.size + 65536 + + + diff --git a/templates/root/persistent-hdfs/conf/hadoop-env.sh b/templates/root/persistent-hdfs/conf/hadoop-env.sh new file mode 100755 index 00000000..b38ba018 --- /dev/null +++ b/templates/root/persistent-hdfs/conf/hadoop-env.sh @@ -0,0 +1,66 @@ +# Set Hadoop-specific environment variables here. + +# The only required environment variable is JAVA_HOME. All others are +# optional. When running a distributed configuration it is best to +# set JAVA_HOME in this file, so that it is correctly defined on +# remote nodes. + +# The java implementation to use. Required. +export JAVA_HOME=/usr/lib/jvm/java-1.6.0 + +# Extra Java CLASSPATH elements. Optional. +# export HADOOP_CLASSPATH= + +# The maximum amount of heap to use, in MB. Default is 1000. +export HADOOP_HEAPSIZE=1000 + +# Extra Java runtime options. Empty by default. +# export HADOOP_OPTS=-server +export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true" + +# Command specific options appended to HADOOP_OPTS when specified +export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS" +export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS" +export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS" +export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS" +export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS" +# export HADOOP_TASKTRACKER_OPTS= +# The following applies to multiple commands (fs, dfs, fsck, distcp etc) +# export HADOOP_CLIENT_OPTS + +# Extra ssh options. Empty by default. +# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR" +export HADOOP_SSH_OPTS="-o ConnectTimeout=5" + +# Where log files are stored. $HADOOP_HOME/logs by default. +# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs +export HADOOP_LOG_DIR=/mnt/persistent-hdfs/logs + +# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default. +# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves + +# host:path where hadoop code should be rsync'd from. Unset by default. +# export HADOOP_MASTER=master:/home/$USER/src/hadoop + +# Seconds to sleep between slave commands. Unset by default. This +# can be useful in large clusters, where, e.g., slave rsyncs can +# otherwise arrive faster than the master can service them. +# export HADOOP_SLAVE_SLEEP=0.1 + +# The directory where pid files are stored. /tmp by default. +export HADOOP_PID_DIR=/var/hadoop/persistent-hdfs/pids + +# A string representing this instance of hadoop. $USER by default. +# export HADOOP_IDENT_STRING=$USER + +# The scheduling priority for daemon processes. See 'man nice'. +# export HADOOP_NICENESS=10 + +# Set hadoop user for CDH (which doesn't allow running as root) +export HADOOP_NAMENODE_USER=hadoop +export HADOOP_DATANODE_USER=hadoop +export HADOOP_SECONDARYNAMENODE_USER=hadoop +export HADOOP_JOBTRACKER_USER=hadoop +export HADOOP_TASKTRACKER_USER=hadoop + +ulimit -n 16000 diff --git a/templates/root/persistent-hdfs/conf/hdfs-site.xml b/templates/root/persistent-hdfs/conf/hdfs-site.xml new file mode 100644 index 00000000..ec000cb2 --- /dev/null +++ b/templates/root/persistent-hdfs/conf/hdfs-site.xml @@ -0,0 +1,76 @@ + + + + + + + dfs.replication + 2 + + + + dfs.block.size + 134217728 + + + + dfs.secondary.http.address + 0.0.0.0:60090 + + The secondary namenode http server address and port. + If the port is 0 then the server will start on a free port. + + + + + dfs.datanode.address + 0.0.0.0:60010 + + The address where the datanode server will listen to. + If the port is 0 then the server will start on a free port. + + + + + dfs.datanode.http.address + 0.0.0.0:60075 + + The datanode http server address and port. + If the port is 0 then the server will start on a free port. + + + + + dfs.datanode.ipc.address + 0.0.0.0:60020 + + The datanode ipc server address and port. + If the port is 0 then the server will start on a free port. + + + + + dfs.http.address + 0.0.0.0:60070 + + The address and the base port where the dfs namenode web ui will listen on. + If the port is 0 then the server will start on a free port. + + + + + dfs.namenode.handler.count + 25 + + + + dfs.datanode.handler.count + 8 + + + + dfs.permissions + false + + + diff --git a/templates/root/persistent-hdfs/conf/mapred-site.xml b/templates/root/persistent-hdfs/conf/mapred-site.xml new file mode 100644 index 00000000..b1637dc8 --- /dev/null +++ b/templates/root/persistent-hdfs/conf/mapred-site.xml @@ -0,0 +1,29 @@ + + + + + + + + + mapred.job.tracker + {{active_master}}:9001 + + + + mapred.tasktracker.map.tasks.maximum + 4 + The maximum number of map tasks that will be run + simultaneously by a task tracker. + + + + + mapred.tasktracker.reduce.tasks.maximum + 2 + The maximum number of reduce tasks that will be run + simultaneously by a task tracker. + + + + diff --git a/templates/root/persistent-hdfs/conf/masters b/templates/root/persistent-hdfs/conf/masters new file mode 100644 index 00000000..d26a1943 --- /dev/null +++ b/templates/root/persistent-hdfs/conf/masters @@ -0,0 +1 @@ +{{active_master}} diff --git a/templates/root/persistent-hdfs/conf/slaves b/templates/root/persistent-hdfs/conf/slaves new file mode 100644 index 00000000..05f969e0 --- /dev/null +++ b/templates/root/persistent-hdfs/conf/slaves @@ -0,0 +1 @@ +{{slave_list}} diff --git a/templates/root/spark/conf/spark-env.sh b/templates/root/spark/conf/spark-env.sh new file mode 100755 index 00000000..2443a2d0 --- /dev/null +++ b/templates/root/spark/conf/spark-env.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +# Set Spark environment variables for your site in this file. Some useful +# variables to set are: +# - MESOS_NATIVE_LIBRARY, to point to your Mesos native library (libmesos.so) +# - SCALA_HOME, to point to your Scala installation +# - SPARK_CLASSPATH, to add elements to Spark's classpath +# - SPARK_JAVA_OPTS, to add JVM options +# - SPARK_MEM, to change the amount of memory used per node (this should +# be in the same format as the JVM's -Xmx option, e.g. 300m or 1g). +# - SPARK_LIBRARY_PATH, to add extra search paths for native libraries. + +export SCALA_HOME=/root/scala-2.9.2 +export MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so + +# Set Spark's memory per machine; note that you can also comment this out +# and have the master's SPARK_MEM variable get passed to the workers. +export SPARK_MEM={{default_spark_mem}} + +# Set JVM options and Spark Java properties +SPARK_JAVA_OPTS+=" -Dspark.local.dir={{spark_local_dirs}}" +export SPARK_JAVA_OPTS