Permalink
Browse files

Updated MPI framework and included it in the distributions (contribut…

…ed by Harvey Feng, https://reviews.apache.org/r/4768).

git-svn-id: https://svn.apache.org/repos/asf/incubator/mesos/trunk@1359445 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
1 parent a12a92c commit e7bf6f6ed7361d55a46dd2d5476841963f5d58da @benh benh committed Jul 9, 2012
Showing with 324 additions and 287 deletions.
  1. +4 −0 Makefile.am
  2. +2 −0 configure.ac
  3. +0 −37 frameworks/mpi/README.txt
  4. +0 −15 frameworks/mpi/nmpiexec
  5. +0 −171 frameworks/mpi/nmpiexec.py
  6. +0 −49 frameworks/mpi/startmpd.py
  7. +0 −15 frameworks/mpi/startmpd.sh
  8. +59 −0 mpi/README
  9. +43 −0 mpi/mpiexec-mesos.in
  10. +216 −0 mpi/mpiexec-mesos.py
View
@@ -39,6 +39,10 @@ EXTRA_DIST += configure.amazon-linux-64 configure.centos-5.4-64 \
configure.macosx configure.ubuntu-lucid-64 configure.ubuntu-natty-64
+# MPI framework.
+EXTRA_DIST += mpi/README mpi/mpiexec-mesos.in mpi/mpiexec-mesos.py
+
+
if HAS_JAVA
maven-install:
@cd src && $(MAKE) $(AM_MAKEFLAGS) maven-install
View
@@ -100,6 +100,8 @@ AC_CONFIG_FILES([include/mesos/mesos.hpp])
AC_CONFIG_FILES([src/java/generated/org/apache/mesos/MesosNativeLibrary.java])
+AC_CONFIG_FILES([mpi/mpiexec-mesos], [chmod +x mpi/mpiexec-mesos])
+
AC_ARG_ENABLE([java],
AS_HELP_STRING([--disable-java],
View
@@ -1,37 +0,0 @@
-Mesos MPICH2 framework readme
---------------------------------------------
-
-Table of Contents:
-1) Installing MPICH2
-2) Running the Mesos MPICH2 framework
-
-=====================
-1) INSTALLING MPICH2:
-=====================
-This framework was developed using MPICH2 on Linux.
-
-You can install MPICH2 from scratch. You can get MPICH2 as well as installation directions here: http://www.mcs.anl.gov/research/projects/mpich2/
-
-I (Andy) installed MPICH2 using apt-get, but in Ubuntu, I had to add the Debian package mirror to my /etc/apt/sources.list file manuall.
-
-I.e. I added 'deb http://ftp.de.debian.org/debian sid main' to the end of the file.
-
-I also had to muck with keys since 9.04 (Jaunty) Ubuntu is using secure apt, so I did:
-
-gpg --recv-keys 4D270D06F42584E6
-gpg --export 4D270D06F42584E6 | apt-key add -
-
-though, theoretically, the following should suffice, it did not for me:
-
-apt-get install debian-keyring debian-archive-keyring
-apt-key update
-
-=====================================
-2) RUNNING THE MESOS MPICH2 FRAMEWORK
-=====================================
-
-1. Start a Mesos master and slaves see the MESOS_HOME/QUICKSTART.txt for help
-with this.
-2. In the MESOS_HOME/frameworks/mpi directory run the nmpiexec script. Pass the
--h flag to see help options.
- Example: ./nmpiexec -m 104857600 1@127.0.1.1:59608 hostname
View
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-if [ "x$PYTHON" == "x" ]; then
- PYTHON=python
- if [ "`uname`" == "SunOS" ]; then
- PYTHON=python2.6
- fi
-fi
-
-if [ "x$MESOS_HOME" == "x" ]; then
- MESOS_HOME="$(dirname $0)/../.."
-fi
-
-export PYTHONPATH=$MESOS_HOME/lib/python:$MESOS_HOME/third_party/protobuf-2.3.0/python:$PYTHONPATH
-exec $PYTHON "$(dirname $0)/nmpiexec.py" $@
View
@@ -1,171 +0,0 @@
-#!/usr/bin/env python
-import mesos
-import mesos_pb2
-import os
-import sys
-import time
-import re
-import threading
-
-from optparse import OptionParser
-from subprocess import *
-
-TOTAL_TASKS = 1
-MPI_TASK = ""
-MPD_PID = ""
-CPUS = 1
-MEM = 1024
-
-def mpiexec(driver):
- print "We've launched all our MPDs; waiting for them to come up"
- while countMPDs() <= TOTAL_TASKS:
- print "...waiting on MPD(s)..."
- time.sleep(1)
- print "Got "+str(TOTAL_TASKS)+" mpd slots, running mpiexec"
- try:
- print "Running: "+"mpiexec -n "+str(TOTAL_TASKS)+" "+MPI_TASK
- os.system("mpiexec -1 -n "+str(TOTAL_TASKS)+" "+MPI_TASK)
- except OSError,e:
- print >>sys.stderr, "Error executing mpiexec"
- print >>sys.stderr, e
- exit(2)
- print "mpiexec completed, calling mpdexit "+MPD_PID
- call(["mpdexit",MPD_PID])
- time.sleep(1)
- driver.stop()
-
-class MyScheduler(mesos.Scheduler):
- def __init__(self, ip, port):
- self.ip = ip
- self.port = port
- self.tasksLaunched = 0
- self.tasksFinished = 0
-
- def getFrameworkName(self, driver):
- return "Mesos MPI Framework"
-
- def getExecutorInfo(self, driver):
- execPath = os.path.join(os.getcwd(), "startmpd.sh")
- initArg = ip + ":" + port
- execInfo = mesos_pb2.ExecutorInfo()
- execInfo.executor_id.value = "default"
- execInfo.uri = execPath
- execInfo.data = initArg
- return execInfo
-
- def registered(self, driver, fid):
- print "Mesos MPI scheduler and mpd running at "+self.ip+":"+self.port
-
- def resourceOffer(self, driver, oid, offers):
- print "Got offer %s" % oid.value
- tasks = []
- if self.tasksLaunched == TOTAL_TASKS:
- print "Rejecting permanently because we have already started"
- driver.replyToOffer(oid, tasks, {"timeout": "-1"})
- return
- for offer in offers:
- print "Considering slot on %s" % offer.hostname
- cpus = 0
- mem = 0
- for r in offer.resources:
- if r.name == "cpus":
- cpus = r.scalar.value
- elif r.name == "mem":
- mem = r.scalar.value
- if cpus < CPUS or mem < MEM:
- print "Rejecting slot due to too few resources"
- elif self.tasksLaunched < TOTAL_TASKS:
- tid = self.tasksLaunched
- print "Accepting slot to start mpd %d" % tid
- task = mesos_pb2.TaskDescription()
- task.task_id.value = str(tid)
- task.slave_id.value = offer.slave_id.value
- task.name = "task %d" % tid
- cpus = task.resources.add()
- cpus.name = "cpus"
- cpus.type = mesos_pb2.Resource.SCALAR
- cpus.scalar.value = CPUS
- mem = task.resources.add()
- mem.name = "mem"
- mem.type = mesos_pb2.Resource.SCALAR
- mem.scalar.value = MEM
- tasks.append(task)
- self.tasksLaunched += 1
- else:
- print "Rejecting slot because we've launched enough tasks"
- print "Replying to offer!"
- driver.replyToOffer(oid, tasks, {"timeout": "-1"})
- if self.tasksLaunched == TOTAL_TASKS:
- threading.Thread(target = mpiexec, args=[driver]).start()
-
- def statusUpdate(self, driver, update):
- print "Task %s in state %s" % (update.task_id.value, update.state)
- if (update.state == mesos_pb2.TASK_FINISHED or
- update.state == mesos_pb2.TASK_FAILED or
- update.state == mesos_pb2.TASK_KILLED or
- update.state == mesos_pb2.TASK_LOST):
- print "A task finished unexpectedly, calling mpdexit "+MPD_PID
- call(["mpdexit",MPD_PID])
- driver.stop()
-
-def countMPDs():
- try:
- mpdtraceout = Popen("mpdtrace -l", shell=True, stdout=PIPE).stdout
- count = 0
- for line in mpdtraceout:
- count += 1
-
- mpdtraceout.close()
- return count
- except OSError,e:
- print >>sys.stderr, "Error starting mpd or mpdtrace"
- print >>sys.stderr, e
- exit(2)
-
-def parseIpPort(s):
- ba = re.search("_([^ ]*) \(([^)]*)\)", s)
- ip = ba.group(2)
- port = ba.group(1)
- return (ip,port)
-
-if __name__ == "__main__":
- parser = OptionParser(usage="Usage: %prog [options] mesos_master mpi_program")
- parser.add_option("-n","--num",
- help="number of slots/mpd:s to allocate (default 1)",
- dest="num", type="int", default=1)
- parser.add_option("-c","--cpus",
- help="number of cpus per slot (default 1)",
- dest="cpus", type="int", default=CPUS)
- parser.add_option("-m","--mem",
- help="number of MB of memory per slot (default 1GB)",
- dest="mem", type="int", default=MEM)
-
- (options,args)=parser.parse_args()
- if len(args)<2:
- print >>sys.stderr, "At least two parameters required."
- print >>sys.stderr, "Use --help to show usage."
- exit(2)
-
- TOTAL_TASKS = options.num
- CPUS = options.cpus
- MEM = options.mem
- MPI_TASK = " ".join(args[1:])
-
- print "Connecting to mesos master %s" % args[0]
-
- try:
- call(["mpd","--daemon"])
- mpdtraceout = Popen("mpdtrace -l", shell=True, stdout=PIPE).stdout
- traceline = mpdtraceout.readline()
- except OSError,e:
- print >>sys.stderr, "Error starting mpd or mpdtrace"
- print >>sys.stderr, e
- exit(2)
-
- (ip,port) = parseIpPort(traceline)
-
- MPD_PID = traceline.split(" ")[0]
- print "MPD_PID is %s" % MPD_PID
-
- sched = MyScheduler(ip, port)
- mesos.MesosSchedulerDriver(sched, args[0]).run()
View
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-import mesos
-import mesos_pb2
-import sys
-import time
-import os
-import atexit
-
-from subprocess import *
-
-def cleanup():
- try:
- # TODO(*): This will kill ALL mpds...oops.
- print "cleanup"
- os.waitpid(Popen("pkill -f /usr/local/bin/mpd", shell=True).pid, 0)
- except Exception, e:
- print e
- None
-
-class MyExecutor(mesos.Executor):
- def init(self, driver, arg):
- [ip,port] = arg.data.split(":")
- self.ip = ip
- self.port = port
-
- def launchTask(self, driver, task):
- print "Running task %s" % task.task_id.value
- update = mesos_pb2.TaskStatus()
- update.task_id.value = task.task_id.value
- update.state = mesos_pb2.TASK_RUNNING
- driver.sendStatusUpdate(update)
- Popen("mpd -n -h "+self.ip+" -p "+self.port, shell=True)
-
- def killTask(self, driver, tid):
- # TODO(*): Kill only one of the mpd's!
- sys.exit(1)
-
- def shutdown(self, driver):
- print "shutdown"
- cleanup()
-
- def error(self, driver, code, message):
- print "Error: %s" % message
-
-if __name__ == "__main__":
- print "Starting executor"
- atexit.register(cleanup)
- executor = MyExecutor()
- mesos.MesosExecutorDriver(executor).run()
View
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-if [ "x$PYTHON" == "x" ]; then
- PYTHON=python
- if [ "`uname`" == "SunOS" ]; then
- PYTHON=python2.6
- fi
-fi
-
-if [ "x$MESOS_HOME" == "x" ]; then
- MESOS_HOME="$(dirname $0)/../.."
-fi
-
-export PYTHONPATH=$MESOS_HOME/lib/python:$MESOS_HOME/third_party/protobuf-2.3.0/python:$PYTHONPATH
-exec $PYTHON "$(dirname $0)/startmpd.py" $@
View
@@ -0,0 +1,59 @@
+Mesos MPICH2 framework readme
+--------------------------------------------
+
+Table of Contents:
+1) Installing MPICH2
+2) Running the Mesos MPICH2 framework
+
+=====================
+1) INSTALLING MPICH2:
+=====================
+- This framework was developed for MPICH2 1.2(mpd was deprecated
+ starting 1.3) on Linux(Ubuntu 11.10) and OS X Lion.
+
+- You can install MPICH2 from scratch. You can get MPICH2 as well as
+ installation directions here:
+ http://www.mcs.anl.gov/research/projects/mpich2/. This tutorial
+ follows the latter. Unpack the tar.gz and...
+
+- To use MPI with Mesos, make sure to have MPICH2 installed on every
+ machine in your cluster.
+
+Setting up:
+-> Install and configure:
+mac : ./configure --prefix=/Users/_your_username_/mpich2-install
+ubuntu : ./configure --prefix=/home/_your_username_/mpich2-install
+ Then...
+ sudo make
+ sudo make install
+
+
+-> Optional: add mpich binaries to PATH. You can specify the path to
+ installed MPICH2 binaries using mpiexec-meso's '--path' option
+mac : sudo vim ~/.bash_profile
+ export PATH=/Users/_your_username_/mpich2-install/bin:$PATH
+ubuntu : sudo vim ~/.bashrc
+ export PATH=/home/_your_username_/mpich2-install/bin:$PATH
+
+-> Create .mpd conf file in home directory:
+ echo "secretword=nil" > ~/.mpd.conf
+ chmod 600 .mpd.conf
+
+-> Check installation - these should all return the PATH's set above
+ which mpd
+ which mpiexec
+ which mpirun
+
+
+=====================================
+2) RUNNING THE MESOS MPICH2 FRAMEWORK
+=====================================
+
+Using/testing mpiexec-mesos:
+-> Start a Mesos master and slaves
+
+-> How to run a Hello, World! program (pass the -h flag to see help options):
+ mpicc helloworld.c -helloworld
+ ./mpiexec-mesos 127.0.0.1:5050 ./helloworld
+ Paths to mesos, protobuf, and distribute eggs can be specified by setting
+ respective environment variables in mpiexec-mesos.
Oops, something went wrong.

0 comments on commit e7bf6f6

Please sign in to comment.