Setup new directory structure that can become /root/spark-ec2 on the AMI

amplab · Jan 27, 2013 · bf8b415 · bf8b415
1 parent f79e072
commit bf8b415
Show file tree

Hide file tree

Showing 36 changed files with 1,650 additions and 0 deletions.
diff --git a/copy-dir b/copy-dir
@@ -0,0 +1 @@
+copy-dir.sh
diff --git a/copy-dir.sh b/copy-dir.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+if [[ "$#" != "1" ]] ; then
+  echo "Usage: copy-dir <dir>"
+  exit 1
+fi
+
+DIR=`readlink -f "$1"`
+DIR=`echo "$DIR"|sed 's@/$@@'`
+DEST=`dirname "$DIR"`
+
+SLAVES=`cat /root/spark-ec2/slaves`
+
+SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5"
+
+echo "RSYNC'ing $DIR to slaves..."
+for slave in $SLAVES; do
+    echo $slave
+    rsync -e "ssh $SSH_OPTS" -az "$DIR" "$slave:$DEST" & sleep 0.5
+done
+wait
diff --git a/create-swap.sh b/create-swap.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+if [ $# -lt 1 ]; then
+  echo "Usage: create-swap <amount of MB>"
+  exit 1
+fi
+
+if [ -e /mnt/swap ]; then
+  echo "/mnt/swap already exists" >&2
+  exit 1
+fi
+
+SWAP_MB=$1
+if [[ "$SWAP_MB" != "0" ]]; then
+  dd if=/dev/zero of=/mnt/swap bs=1M count=$SWAP_MB
+  mkswap /mnt/swap
+  swapon /mnt/swap
+  echo "Added $SWAP_MB MB swap file /mnt/swap"
+fi
diff --git a/deploy_templates.py b/deploy_templates.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from __future__ import with_statement
+
+import os
+import sys
+
+# Deploy the configuration file templates in the spark-ec2/templates directory
+# to the root filesystem, substituting variables such as the master hostname,
+# ZooKeeper URL, etc as read from the environment.
+
+# Find system memory in KB and compute Spark's default limit from that
+system_ram_kb = int(
+  os.popen("cat /proc/meminfo | grep MemTotal | awk '{print $2}'")
+    .read().strip())
+system_ram_mb = system_ram_kb / 1024
+if system_ram_mb > 20*1024:
+  # Leave 3 GB for the OS, HDFS and buffer cache
+  spark_mb = system_ram_mb - 3 * 1024
+elif system_ram_mb > 10*1024:
+  # Leave 2 GB for the OS & co.
+  spark_mb = system_ram_mb - 2 * 1024
+else:
+  # Leave 1.3 GB for the OS & co. Note that this must be more than
+  # 1 GB because Mesos leaves 1 GB free and requires 32 MB/task.
+  spark_mb = max(512, system_ram_mb - 1300)
+
+template_vars = {
+  "master_list": os.getenv("MASTERS"),
+  "active_master": os.getenv("MASTERS").split("\n")[0],
+  "slave_list": os.getenv("SLAVES"),
+  "zoo_list": os.getenv("MESOS_ZOO_LIST"),
+  "cluster_url": os.getenv("MESOS_CLUSTER_URL"),
+  "hdfs_data_dirs": os.getenv("HDFS_DATA_DIRS"),
+  "mapred_local_dirs": os.getenv("MAPRED_LOCAL_DIRS"),
+  "spark_local_dirs": os.getenv("SPARK_LOCAL_DIRS"),
+  "default_spark_mem": "%dm" % spark_mb
+}
+
+template_dir="/root/spark-ec2/templates"
+
+for path, dirs, files in os.walk(template_dir):
+  if path.find(".svn") == -1:
+    dest_dir = os.path.join('/', path[len(template_dir):])
+    if not os.path.exists(dest_dir):
+      os.makedirs(dest_dir)
+    for filename in files:
+      if filename[0] not in '#.~' and filename[-1] != '~':
+        dest_file = os.path.join(dest_dir, filename)
+        with open(os.path.join(path, filename)) as src:
+          with open(dest_file, "w") as dest:
+            print "Configuring " + dest_file
+            text = src.read()
+            for key in template_vars:
+              text = text.replace("{{" + key + "}}", template_vars[key])
+            dest.write(text)
+            dest.close()
diff --git a/ec2-variables.sh b/ec2-variables.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# These variables should be set before running setup.sh.
+export MASTERS="ec2-107-22-79-196.compute-1.amazonaws.com"
+export SLAVES="ec2-107-22-111-247.compute-1.amazonaws.com"
+export HDFS_DATA_DIRS="/mnt/ephemeral-hdfs/data,/mnt2/ephemeral-hdfs/data"
+export MAPRED_LOCAL_DIRS="/mnt/hadoop/mrlocal,/mnt2/hadoop/mrlocal"
+
+export MESOS_ZOO_LIST="none"
+export SWAP_MB=1024
+
+# Supported modules
+#   spark
+#   ephemeral-hdfs
+#   persistent-hdfs
+#   mesos
+export MODULES="spark ephemeral-hdfs mesos"
+
+# Other variables used in scripts
+# export SPARK_LOCAL_DIRS
+# export MESOS_DOWNLOAD_METHOD
diff --git a/ephemeral-hdfs/setup-slave.sh b/ephemeral-hdfs/setup-slave.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Setup ephemeral-hdfs
+mkdir -p /mnt/ephemeral-hdfs/logs
+mkdir -p /mnt/hadoop-logs
+
+# Create Hadoop and HDFS directories in a given parent directory
+# (for example /mnt, /mnt2, and so on)
+function create_hadoop_dirs {
+  location=$1
+  if [[ -e $location ]]; then
+    mkdir -p $location/ephemeral-hdfs $location/hadoop/tmp
+    mkdir -p $location/hadoop/mrlocal $location/hadoop/mrlocal2
+  fi
+}
+
+# Set up Hadoop and Mesos directories in /mnt
+create_hadoop_dirs /mnt
+create_hadoop_dirs /mnt2
+create_hadoop_dirs /mnt3
+create_hadoop_dirs /mnt4
diff --git a/ephemeral-hdfs/setup.sh b/ephemeral-hdfs/setup.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+EPHEMERAL_HDFS=/root/ephemeral-hdfs
+pushd $EPHEMERAL_HDFS
+
+source ./setup-slave.sh
+
+for node in $SLAVES $OTHER_MASTERS; do
+  echo $node
+  ssh -t $SSH_OPTS root@$node "/root/spark-ec2/ephemeral-hdfs/setup-slave.sh" & sleep 0.3
+done
+
+/root/spark-ec2/copy-dir $EPHEMERAL_HDFS/conf
+
+echo "Formatting ephemeral HDFS namenode..."
+$EPHEMERAL_HDFS/bin/hadoop namenode -format
+
+echo "Starting ephemeral HDFS..."
+$EPHEMERAL_HDFS/bin/start-dfs.sh
+
+popd
diff --git a/mesos/compute_cluster_url.py b/mesos/compute_cluster_url.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+
+# Get the Mesos cluster URL, assuming the EC2 script environment variables
+# are all available.
+
+active_master = os.getenv("MASTERS").split("\n")[0]
+zoo_list = os.getenv("MESOS_ZOO_LIST")
+
+if zoo_list.strip() == "NONE":
+  print active_master + ":5050"
+else:
+  zoo_nodes = zoo_list.trim().split("\n")
+  print "zoo://" + ",".join(["%s:2181/mesos" % node for node in zoo_nodes])
diff --git a/mesos/redeploy-mesos b/mesos/redeploy-mesos
@@ -0,0 +1,24 @@
+#!/bin/bash
+cd /root/spark-ec2
+
+MASTERS=`cat masters`
+NUM_MASTERS=`cat masters | wc -l`
+SLAVES=`cat slaves`
+
+SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5"
+
+if [[ $NUM_MASTERS -gt 1 ]]; then
+  echo "RSYNC'ing /root/mesos to masters..."
+  for master in $MASTERS; do
+    echo $master
+    rsync -e "ssh $SSH_OPTS" -az --exclude '*.d' --exclude '*.o' --exclude '*.cpp' --exclude '*.hpp' --exclude '*.pyc' --exclude 'mesos/frameworks/hadoop-0.20.0/logs/*' --exclude 'mesos/work' --exclude 'mesos/logs' --exclude 'mesos/test_output' /root/mesos $master:/root & sleep 0.3
+  done
+  wait
+fi
+
+echo "RSYNC'ing /root/mesos to slaves..."
+for slave in $SLAVES; do
+  echo $slave
+  rsync -e "ssh $SSH_OPTS" -az --exclude '*.d' --exclude '*.o' --exclude '*.cpp' --exclude '*.hpp' --exclude '*.pyc' --exclude 'mesos/frameworks/hadoop-0.20.0/logs/*' --exclude 'mesos/work' --exclude 'mesos/logs' --exclude 'mesos/test_output' /root/mesos $slave:/root & sleep 0.3
+done
+wait
diff --git a/mesos/setup.sh b/mesos/setup.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+echo "$MESOS_ZOO_LIST" > zoo
+ZOOS=`cat zoo`
+
+if [[ $ZOOS = *NONE* ]]; then
+  NUM_ZOOS=0
+  ZOOS=""
+else
+  NUM_ZOOS=`cat zoo | wc -l`
+fi
+
+if [[ $NUM_ZOOS != 0 ]] ; then
+  echo "SSH'ing to ZooKeeper server(s) to approve keys..."
+  zid=1
+  for zoo in $ZOOS; do
+    echo $zoo
+    ssh $SSH_OPTS $zoo echo -n \; mkdir -p /tmp/zookeeper \; echo $zid \> /tmp/zookeeper/myid &
+    zid=$(($zid+1))
+    sleep 0.3
+  done
+fi
+
+mkdir -p /mnt/mesos-logs
+mkdir -p /mnt/mesos-work
+
+for node in $SLAVES $OTHER_MASTERS; do
+  ssh -t $SSH_OPTS root@$node "mkdir -p /mnt/mesos-logs /mnt/mesos-work" & sleep 0.3
+done
+
+DOWNLOADED=0
+
+if [[ "$MESOS_DOWNLOAD_METHOD" == "git" ]] ; then
+  # change git's ssh command so it does not ask to accept a keys
+  export GIT_SSH=/root/spark-ec2/ssh-no-keychecking.sh
+  REPOSITORY=git://github.com/apache/mesos.git
+  echo "Checking out Mesos from $REPOSITORY"
+  pushd /root > /dev/null 2>&1
+  rm -rf mesos mesos.tgz
+  # Set git SSH command to a script that uses -o StrictHostKeyChecking=no
+  git clone $REPOSITORY mesos
+  pushd mesos 2>&1
+  git checkout -b $BRANCH --track origin/$BRANCH
+  popd > /dev/null 2>&1
+  popd > /dev/null 2>&1
+  DOWNLOADED=1
+fi
+
+# Build Mesos if we downloaded it
+if [[ "$DOWNLOADED" == "1" ]] ; then
+  echo "Building Mesos..."
+  mkdir /root/mesos/build
+  pushd /root/mesos/build > /dev/null 2>&1
+  ./configure.amazon-linux-64
+  make clean
+  make
+  popd > /dev/null 2>&1
+  if [ -d /root/spark ] ; then
+    echo "Building Spark..."
+    pushd /root/spark > /dev/null 2>&1
+    git pull
+    sbt/sbt clean compile
+    popd > /dev/null 2>&1
+  fi
+  echo "Building Hadoop framework..."
+  pushd /root/mesos/build > /dev/null 2>&1
+  make hadoop
+  rm -fr /root/hadoop-mesos
+  mv /root/mesos/build/hadoop/hadoop-0.20.205.0 /root/hadoop-mesos
+  popd > /dev/null 2>&1
+fi
+
+echo "Setting up Hadoop framework config files..."
+cp hadoop-framework-conf/* /root/hadoop-mesos/conf
+
+echo "Deploying Hadoop framework config files..."
+/root/spark-ec2/copy-dir /root/hadoop-mesos/conf
+
+echo "Redeploying /root/mesos..."
+./redeploy-mesos
+
+if [[ $NUM_ZOOS != 0 ]]; then
+  echo "Starting ZooKeeper quorum..."
+  for zoo in $ZOOS; do
+    ssh $SSH_OPTS $zoo "/root/mesos/third_party/zookeeper-*/bin/zkServer.sh start </dev/null >/dev/null" & sleep 0.1
+  done
+  wait
+  sleep 5
+fi
+
+echo "Stopping any existing Mesos cluster..."
+./stop-mesos
+sleep 2
+
+echo "Starting Mesos cluster..."
+./start-mesos
diff --git a/mesos/start-mesos b/mesos/start-mesos
@@ -0,0 +1,54 @@
+#!/bin/bash
+cd /root/spark-ec2
+
+MASTERS=`cat masters`
+ACTIVE_MASTER=`cat masters | head -1`
+SLAVES=`cat slaves`
+ZOOS=`cat zoo`
+
+
+if [[ $ZOOS = *NONE* ]]; then
+  NUM_ZOOS=0
+else
+  NUM_ZOOS=`cat zoo | wc -l`
+fi
+
+SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5"
+
+cluster_url=`cat cluster-url`
+
+echo "Running with cluster URL: "$cluster_url
+
+if [[ $NUM_ZOOS != 0 ]]; then
+  masterid=1
+  for master in $MASTERS; do
+    echo "Starting master $masterid on $master"
+    ssh $SSH_OPTS $master "/root/spark-ec2/mesos/mesos-daemon mesos-master -p 5050 -u $cluster_url $@ </dev/null >/dev/null" & sleep 0.3
+    masterid=$(($masterid+1))
+  done
+  wait
+else
+  echo "Starting master on $ACTIVE_MASTER"
+  ssh $SSH_OPTS $ACTIVE_MASTER "/root/spark-ec2/mesos/mesos-daemon mesos-master --failover_timeout=1 -p 5050 $@ </dev/null >/dev/null"
+fi
+
+sleep 5
+
+for slave in $SLAVES; do
+  echo "Starting slave on $slave"
+  ssh $SSH_OPTS $slave "/root/spark-ec2/mesos/mesos-daemon mesos-slave -m ${cluster_url} </dev/null >/dev/null" &
+  sleep 0.3
+done
+wait
+
+if [[ $NUM_ZOOS != 0 ]]; then
+  echo "ZooKeeper is running at"
+  for zoo in $ZOOS; do
+    echo "      $zoo:2181"
+  done
+fi
+
+echo "Everything's started! You can view the master Web UI at"
+for master in $MASTERS; do
+  echo "      http://$master:8080"
+done
diff --git a/mesos/stop-mesos b/mesos/stop-mesos
@@ -0,0 +1,21 @@
+#!/bin/bash
+cd /root/mesos-ec2
+
+MASTERS=`cat masters`
+SLAVES=`cat slaves`
+
+SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5"
+
+for slave in $SLAVES; do
+  echo "Stopping slave on $slave"
+  ssh $SSH_OPTS $slave pkill mesos-slave &
+  sleep 0.1
+done
+wait
+
+for master in $MASTERS; do
+  echo "Stopping master on $master"
+  ssh $SSH_OPTS $master pkill mesos-master &
+  sleep 0.1
+done
+wait