# General Setup for Multiprocessing Communication

In [None]:
import logging
logger = logging.getLogger()
logger.setLevel('INFO')
logger.info('Yo, Yo!')

# General Example of each Multiprocessing Function

## Setup to make running each more common

In [None]:
with open('mp.py', 'w') as test_code:
    test_code.write("""
import time
import logging

def run(proc_id, *args, **kwargs):
    sleep_time = proc_id
    logging.info('Process {}: {}'.format(proc_id, args))
    #logging.info(delay)#, kwargs)
    time.sleep(sleep_time)
    return proc_id, args, kwargs
""")

In [None]:
# The code being multi-processed must have an importable main
# so it can't be defined in IPython.
import mp

mp.run(0)

In [None]:
from multiprocessing import Pool

def callback(res):
    logger.info('Callback:', res)
    
def error_callback(res):
    logger.info('Error Callback:', res)

def test(pool_func, **kwargs):
    pool = Pool(processes=4)
    f = getattr(pool, pool_func)
    logger.info('Starting MP run with: {}'.format(kwargs))
    res = f(mp.run, **kwargs)
    pool.close()
    pool.join()
    return res

In [None]:
args = (
    (1,{'hello':'test'}),
    (2, 'goodbye'),
    (3,{'hello':'test'}),
    (4, 'goodbye'),
    (5,{'hello':'test'}),
    (6, 'goodbye'),
    (7,{'hello':'test'}),
    (8, 'goodbye'),
    (9,{'hello':'test'}),
    (10, 'goodbye'),
)

## apply(func[, args[, kwds]])

This runs this one function in the pool passing in the arguments to that function in the worker.

In [None]:
test('apply', args=tuple(range(2)))

## apply_async(func[, args[, kwds[, callback[, error_callback]]]])
A variant of the apply() method which returns a result object.

If callback is specified then it should be a callable which accepts a single argument. When the result becomes ready callback is applied to it, that is unless the call failed, in which case the error_callback is applied instead.

If error_callback is specified then it should be a callable which accepts a single argument. If the target function fails, then the error_callback is called with the exception instance.

Callbacks should complete immediately since otherwise the thread which handles the results will get blocked.

In [None]:
res = test('apply_async', args=tuple(range(2)))
res.get(-1)

## map(func, iterable[, chunksize])
A parallel equivalent of the map() built-in function (it supports only one iterable argument though). It blocks until the result is ready.

This method chops the iterable into a number of chunks which it submits to the process pool as separate tasks. The (approximate) size of these chunks can be specified by setting chunksize to a positive integer.

In [None]:
test('map', iterable=tuple(range(2)))

## map_async(func, iterable[, chunksize[, callback[, error_callback]]])
A variant of the map() method which returns a result object.

If callback is specified then it should be a callable which accepts a single argument. When the result becomes ready callback is applied to it, that is unless the call failed, in which case the error_callback is applied instead.

If error_callback is specified then it should be a callable which accepts a single argument. If the target function fails, then the error_callback is called with the exception instance.

Callbacks should complete immediately since otherwise the thread which handles the results will get blocked.

In [None]:
res = test('map_async', iterable=tuple(range(2)))
res.get(-1)

## imap(func, iterable[, chunksize])
A lazier version of map().

The chunksize argument is the same as the one used by the map() method. For very long iterables using a large value for chunksize can make the job complete much faster than using the default value of 1.

Also if chunksize is 1 then the next() method of the iterator returned by the imap() method has an optional timeout parameter: next(timeout) will raise multiprocessing.TimeoutError if the result cannot be returned within timeout seconds.

In [None]:
res = test('imap', iterable=tuple(range(2)))
for r in res:
    logger.info('Return value: {}'.format(r))

## imap_unordered(func, iterable[, chunksize])
The same as imap() except that the ordering of the results from the returned iterator should be considered arbitrary. (Only when there is only one worker process is the order guaranteed to be “correct”.)

In [None]:
res = test('imap_unordered', iterable=tuple(range(2)))
for r in res:
    logger.info('Return value: {}'.format(r))

## starmap(func, iterable[, chunksize])
Like map() except that the elements of the iterable are expected to be iterables that are unpacked as arguments.

Hence an iterable of [(1,2), (3, 4)] results in [func(1,2), func(3,4)].

New in version 3.3.

In [None]:
test('starmap', iterable=((1,{'hello':'test'}), (2, 'goodbye')))

## starmap_async(func, iterable[, chunksize[, callback[, error_back]]])
A combination of starmap() and map_async() that iterates over iterable of iterables and calls func with the iterables unpacked. Returns a result object.

New in version 3.3.

In [None]:
def starmap_callback(res):
    print('Callback:', res)

res = test('starmap_async', iterable=args, callback=starmap_callback, chunksize=4)
%timeit res.get(-1)

# Effect of Chunksize

In [None]:
logger.setLevel('WARN')
pool = Pool(processes=1)
%timeit pool.starmap(mp.run, iterable=args)
%timeit pool.starmap(mp.run, iterable=args, chunksize=4)
pool.close()
pool.join()

# Using Queues

In [None]:
from multiprocessing import Pool, Manager

# The Manager class is needed to use a Queue with the multiprocessing.Pool
m = Manager()
q = m.Queue()
args = (
    (1, 2, q, "Twice#1"),
    (2, 3, q, "Thrice"),
    (3, 1, q, "Once"),
    (4, 5, q, "Penta"),
    (5, 2, q, "Twice#2"),
)

In [None]:
# As before, the function used to drive each child process must have an importable main
# so we must create a separate module containing the worker function since the notebook
# cannot be used directly.
with open('queue_test.py', 'w') as test_code:
    test_code.write("""
import logging
logger = logging.getLogger()
logger.setLevel('INFO')
import time

def run(job_id, reps, q, message):
    time.sleep(2)
    q.put([
        'Job({}): {} #{} of {}'.format(job_id, message, i+1, reps) for i in range(reps)
    ])
    logger.info('Job {} completed.'.format(job_id))
""")

import queue_test

In [None]:
import queue
from IPython.display import Markdown, display

def the_callback(res):
    """ Simple callback executed as each child process completes. """
    for r in res:
        logger.info('   Callback called with result: {}'.format(r))
    
display(Markdown('* {}'.format('\n* '.join([str(a) for a in args]))))

# Create the pool and assign the work function and arguments.
logger.info('Starting child processes:')
pool = Pool(processes=2)
res = pool.starmap_async(queue_test.run, iterable=args, callback=the_callback)

# In the parent/main process, handle the data coming from each of each of the child workers.
logger.info('Start checking queue:')
exit_cond = 1
while not res.ready() or not q.empty():
    # Catching the queue.Empty is needed because the last message can be processed before the result
    # indicates completion.
    try:
        value = q.get(block=True, timeout=1)
        logger.info('   Queue message: {}'.format(value))
        q.task_done()
    except queue.Empty:
        pass
results = res.get(-1)
for r in results:
    logger.info(r)

pool.close()
pool.join()
logger.info('Done')

In [None]:
pool.close()
pool.join()

# Queues

In [None]:
from multiprocessing import Process, Queue

def f(q):
    q.put([42, None, 'hello'])

if __name__ == '__main__':
    q = Queue()
    p = Process(target=f, args=(q,))
    p.start()
    print q.get()    # prints "[42, None, 'hello']"
    p.join()