Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

More initial work (master mostly works)

  • Loading branch information...
commit fe5cbff150259966c660e6930e0a1fad551a2ab2 1 parent 32a498f
@dcramer authored
View
4 README.rst
@@ -6,7 +6,7 @@ Taskmaster
Create an iterator, and callback::
# mymodule/job.py
- def get_jobs(last_job=0):
+ def get_jobs(last=0):
# last_job would be sent if state was resumed
# from a previous run
for i in xrange(last_job, 100000000):
@@ -22,4 +22,4 @@ Spawn a master::
Spawn slaves::
- tm-slave mymodule.job:handle_job --host=127.0.0.1:3050 --key=foobar
+ tm-slave mymodule.job:handle_job --host=127.0.0.1:3050 --key=foobar --procs=1 --threads=1
View
12 setup.py
@@ -1,6 +1,6 @@
#!/usr/bin/python
-from setuptools import setup
+from setuptools import setup, find_packages
setup(
name="taskmaster",
@@ -10,14 +10,18 @@
author="David Cramer",
author_email="dcramer@gmail.com",
url="https://github.com/dcramer/taskmaster",
- packages=["taskmaster", "taskmaster.master", "taskmaster.slave"],
+ packages=find_packages("src/taskmaster"),
package_dir={'': 'src'},
entry_points={
'console_scripts': [
- 'tm-master = taskmaster.master:main',
- 'tm-slave = taskmaster.slave:main',
+ 'tm-master = taskmaster.cli.master:main',
+ 'tm-slave = taskmaster.cli.slave:main',
],
},
+ tests_require=[
+ 'unittest2',
+ 'Nose>=1.0',
+ ],
classifiers=[
"Environment :: Console",
"Intended Audience :: Developers",
View
0  src/taskmaster/__init__.py
No changes.
View
0  src/taskmaster/cli/__init__.py
No changes.
View
84 src/taskmaster/cli/master.py
@@ -0,0 +1,84 @@
+"""
+taskmaster.cli.master
+~~~~~~~~~~~~~~~~~~~~~
+
+:copyright: (c) 2010 DISQUS.
+:license: Apache License 2.0, see LICENSE for more details.
+"""
+
+from multiprocessing.managers import BaseManager
+from threading import Thread
+import Queue
+import time
+
+
+class QueueServer(Thread):
+ def __init__(self, manager):
+ Thread.__init__(self)
+ self.manager = manager
+ self.server = None
+
+ def run(self):
+ self.server = self.manager.get_server()
+ self.server.serve_forever()
+
+ def shutdown(self):
+ if self.server:
+ self.server.shutdown()
+
+
+class QueueManager(BaseManager):
+ pass
+
+
+def sample(last=0):
+ return xrange(last, 1000000)
+
+
+def run(target, size=10000, host='0.0.0.0:3050', key='taskmaster'):
+ host, port = host.split(':')
+
+ queue = Queue.Queue(maxsize=size)
+
+ QueueManager.register('get_queue', callable=lambda: queue)
+
+ manager = QueueManager(address=(host, int(port)), authkey=key)
+ server = QueueServer(manager)
+ server.daemon = True
+ server.start()
+
+ try:
+ mod_path, func_name = target.split(':', 1)
+ except ValueError:
+ raise ValueError('target must be in form of `path.to.module:function_name`')
+
+ module = __import__(mod_path, {}, {}, [func_name], -1)
+ callback = getattr(module, func_name)
+
+ # last=<last serialized job>
+ kwargs = {}
+
+ for job in callback(**kwargs):
+ queue.put(job)
+
+ while not Queue.empty():
+ time.sleep(0.1)
+
+ server.shutdown()
+
+
+def main():
+ import optparse
+ import sys
+ parser = optparse.OptionParser()
+ parser.add_option("--host", dest="host", default='0.0.0.0:3050')
+ parser.add_option("--size", dest="size", default='10000', type=int)
+ parser.add_option("--key", dest="key", default='taskmaster')
+ (options, args) = parser.parse_args()
+ if len(args) != 1:
+ print 'Usage: tm-master <callback>'
+ sys.exit(1)
+ sys.exit(run(args[0], **options.__dict__))
+
+if __name__ == '__main__':
+ main()
View
51 src/taskmaster/cli/slave.py
@@ -0,0 +1,51 @@
+"""
+taskmaster.cli.slave
+~~~~~~~~~~~~~~~~~~~~
+
+:copyright: (c) 2010 DISQUS.
+:license: Apache License 2.0, see LICENSE for more details.
+"""
+
+from multiprocessing.managers import BaseManager
+
+
+class QueueManager(BaseManager):
+ pass
+
+
+def run(target, host='0.0.0.0:3050', key='taskmaster', threads=1):
+ QueueManager.register('get_queue')
+
+ host, port = host.split(':')
+
+ m = QueueManager(address=(host, int(port)), authkey=key)
+ m.connect()
+ queue = m.get_queue()
+
+ mod_path, func_name = target.split(':', 1)
+ module = __import__(mod_path, {}, {}, [func_name], -1)
+ callback = getattr(module, func_name)
+
+ pool = ThreadPool(queue, size=threads)
+
+ # TODO: how do we know if we're done?
+ pool.join()
+ callback(queue.get)
+
+
+def main():
+ import optparse
+ import sys
+ parser = optparse.OptionParser()
+ parser.add_option("--host", dest="host", default='0.0.0.0:3050')
+ parser.add_option("--key", dest="key", default='taskmaster')
+ parser.add_option("--threads", dest="threads", default=1, type=int)
+ # parser.add_option("--procs", dest="procs", default=1, type=int)
+ (options, args) = parser.parse_args()
+ if len(args) != 1:
+ print 'Usage: tm-slave <callback>'
+ sys.exit(1)
+ sys.exit(args[0], run(**options.__dict__))
+
+if __name__ == '__main__':
+ main()
View
39 src/taskmaster/master.py
@@ -1,39 +0,0 @@
-"""
-taskmaster.master
-~~~~~~~~~~~~~~~~~
-
-:copyright: (c) 2010 DISQUS.
-:license: Apache License 2.0, see LICENSE for more details.
-"""
-
-from multiprocessing.managers import BaseManager
-import Queue
-
-
-def run(size=10000, host='0.0.0.0:3050', key='taskmaster'):
- class QueueManager(BaseManager):
- pass
-
- host, port = host.split(':')
-
- queue = Queue.Queue(max_size=size)
-
- QueueManager.register('get_queue', callable=lambda: queue)
-
- m = QueueManager(address=(host, int(port)), key=key)
- s = m.get_server()
- s.serve_forever()
-
-
-def main():
- import optparse
- import sys
- parser = optparse.OptionParser()
- parser.add_option("--host", dest="host", default='0.0.0.0:3050')
- parser.add_option("--size", dest="size", default='10000', type=int)
- parser.add_option("--key", dest="key", default='taskmaster')
- (options, args) = parser.parse_args()
- sys.exit(run(**options.__dict__))
-
-if __name__ == '__main__':
- main()
View
35 src/taskmaster/slave.py
@@ -1,35 +0,0 @@
-"""
-taskmaster.slave
-~~~~~~~~~~~~~~~~
-
-:copyright: (c) 2010 DISQUS.
-:license: Apache License 2.0, see LICENSE for more details.
-"""
-
-from multiprocessing.managers import BaseManager
-
-
-def run(host='0.0.0.0:3050', key='taskmaster'):
- class QueueManager(BaseManager):
- pass
-
- QueueManager.register('get_queue')
-
- host, port = host.split(':')
-
- m = QueueManager(address=(host, int(port)), key=key)
- m.connect()
- # queue = m.get_queue()
-
-
-def main():
- import optparse
- import sys
- parser = optparse.OptionParser()
- parser.add_option("--host", dest="host", default='0.0.0.0:3050')
- parser.add_option("--key", dest="key", default='taskmaster')
- (options, args) = parser.parse_args()
- sys.exit(run(**options.__dict__))
-
-if __name__ == '__main__':
- main()
View
125 src/taskmaster/taskmaster.py
@@ -0,0 +1,125 @@
+import sys
+import time
+from cPickle import dumps, loads
+from os import path, unlink
+
+from taskmaster.workers import ThreadPool
+
+
+class Taskmaster(object):
+ def __init__(self, callback, queryset, state_file=None, qs_kwargs=None, node='1/1', progress=True):
+ if not state_file:
+ callback_file = sys.modules[callback.__module__].__file__
+ state_file = path.join(path.dirname(callback_file), '%s.node%s.state' % (path.basename(callback_file), node.replace('/', '-')))
+
+ if qs_kwargs is None:
+ qs_kwargs = {}
+
+ self.nodestr = node
+ self.node, self.total_nodes = map(int, node.split('/', 1))
+ self.node -= 1
+
+ self.callback = callback
+ self.state_file = state_file
+ self.queryset = queryset
+ self.qs_kwargs = qs_kwargs
+
+ self.progress = progress
+
+ def read_state(self):
+ if path.exists(self.state_file):
+ print "Reading previous state from %r" % self.state_file
+ with open(self.state_file, 'r') as fp:
+ data = fp.read()
+ if not data:
+ return {}
+ try:
+ return loads(data)
+ except Exception, e:
+ print e
+ return {}
+
+ def state_writer(self, id_state):
+ def cleanup(last_id):
+ for id_val, done in id_state.items():
+ if done and id_val <= last_id:
+ id_state.pop(id_val, None)
+
+ with open(self.state_file, 'w') as state_fp:
+ i = 0
+ while True:
+ try:
+ # we sort by lowest unprocessed id first, then highest processed id
+ last_job = sorted(id_state.items(), key=lambda x: (x[1], -x[0] if x[1] else x[0]))[0][0]
+ except IndexError:
+ time.sleep(0.1)
+ continue
+
+ state_fp.seek(0)
+ state_fp.write(dumps(last_job))
+
+ cleanup(last_job)
+
+ i += 1
+ if self.progress:
+ self.pbar.update(i)
+
+ def handle(self, obj, id_state):
+ if obj.pk % self.total_nodes != self.node:
+ return
+
+ id_state[obj.pk] = 0
+ try:
+ self.callback(obj)
+ finally:
+ id_state[obj.pk] = 1
+
+ def reset(self):
+ if path.exists(self.state_file):
+ unlink(self.state_file)
+
+ def get_pool(self, workers=1):
+ return ThreadPool(workers)
+
+ def put_job(self, pool, func, *args):
+ pool.spawn_n(func, *args)
+
+ def run(self, workers=1):
+ id_state = {
+ # stores a map of object ids to an int value representing if
+ # they've completed yet
+ # obj.pk: 1/0
+ }
+
+ queryset = self.queryset
+ qs_kwargs = self.qs_kwargs
+
+ state = self.read_state()
+
+ if state.get('last_id'):
+ qs_kwargs['min_id'] = max(int(state['last_id']), qs_kwargs.get('min_id', 0))
+
+ pool = self.get_pool(workers)
+
+ widgets = ['Status: ', Counter(), ' | ', Speed(), ' | ', Timer()]
+
+ print "Starting workers for thread=%r (node=%s) at min_id=%s" % (
+ thread.get_ident(), self.nodestr, qs_kwargs.get('min_id') or 0)
+ state_writer = Thread(target=self.state_writer, kwargs={
+ 'id_state': id_state,
+ })
+ state_writer.daemon = True
+ state_writer.start()
+
+ if self.progress:
+ self.pbar = ProgressBar(widgets=widgets, maxval=UnknownLength)
+ self.pbar.start()
+
+ for obj in RangeQuerySetWrapper(queryset, sorted=True, **qs_kwargs):
+ self.put_job(pool, self.handle, obj, id_state)
+
+ pool.waitall()
+ state_writer.join(1)
+
+ if self.progress:
+ self.pbar.finish()
View
48 src/taskmaster/workers.py
@@ -0,0 +1,48 @@
+"""
+taskmaster.workers
+~~~~~~~~~~~~~~~~~~
+
+:copyright: (c) 2010 DISQUS.
+:license: Apache License 2.0, see LICENSE for more details.
+"""
+
+import time
+from threading import Thread
+from Queue import Empty
+
+
+class Worker(Thread):
+ def __init__(self, queue):
+ Thread.__init__(self)
+ self.queue = queue
+
+ def run(self):
+ self.running = True
+ while self.running:
+ try:
+ func, args, kwargs = self.queue.get_nowait()
+ except Empty:
+ time.sleep(0.1)
+ continue
+
+ try:
+ func(*args, **kwargs)
+ except KeyboardInterrupt:
+ return
+ finally:
+ self.queue.task_done()
+
+
+class ThreadPool(object):
+ def __init__(self, queue, size=10):
+ self.workers = []
+ for worker in xrange(size):
+ self.workers.append(Worker(queue))
+
+ for worker in self.workers:
+ worker.start()
+
+ def join(self):
+ for worker in self.workers:
+ worker.running = False
+ worker.join()
View
0  tests/__init__.py
No changes.
View
0  tests/taskmaster/__init__.py
No changes.
View
0  tests/taskmaster/cli/__init__.py
No changes.
View
0  tests/taskmaster/cli/master/__init__.py
No changes.
View
2  tests/taskmaster/cli/master/tests.py
@@ -0,0 +1,2 @@
+def an_iterator(last=0):
+ return xrange(last, 100000)
View
0  tests/taskmaster/cli/slave/__init__.py
No changes.
View
0  tests/taskmaster/cli/slave/tests.py
No changes.
Please sign in to comment.
Something went wrong with that request. Please try again.