# Section 5: Parallel Processing With Multiprocessing

In [1]:
import collections


Scientist = collections.namedtuple('Scientist', [
    'name',
    'field',
    'born',
    'nobel',
])

scientists = (
    Scientist(name='Ada Lovelace', field='math', born=1815, nobel=False),
    Scientist(name='Emmy Noether', field='math', born=1882, nobel=False),
    Scientist(name='Marie Curie', field='math', born=1867, nobel=True),
    Scientist(name='Tu Youyou', field='physics', born=1930, nobel=True),
    Scientist(name='Ada Yonath', field='chemistry', born=1939, nobel=True),
    Scientist(name='Vera Rubin', field='chemistry', born=1928, nobel=False),
    Scientist(name='Sally Ride', field='physics', born=1951, nobel=False),
)

In [39]:
import time
from pprint import pprint
def transform(x):
    print(f'\nProcessing record {x.name}')
    time.sleep(1)
    print(f'\nDone processing record{x.name}')
    return {'name': x.name, 'age':2017- x.born}

In [40]:
result = tuple(map(
    transform,
    scientists))


Processing record Ada Lovelace

Done processing recordAda Lovelace

Processing record Emmy Noether

Done processing recordEmmy Noether

Processing record Marie Curie

Done processing recordMarie Curie

Processing record Tu Youyou

Done processing recordTu Youyou

Processing record Ada Yonath

Done processing recordAda Yonath

Processing record Vera Rubin

Done processing recordVera Rubin

Processing record Sally Ride

Done processing recordSally Ride


In [41]:
result

({'name': 'Ada Lovelace', 'age': 202},
 {'name': 'Emmy Noether', 'age': 135},
 {'name': 'Marie Curie', 'age': 150},
 {'name': 'Tu Youyou', 'age': 87},
 {'name': 'Ada Yonath', 'age': 78},
 {'name': 'Vera Rubin', 'age': 89},
 {'name': 'Sally Ride', 'age': 66})

# The multiprocessing Module


In [42]:
import multiprocessing

# Measuring Execution Time in the multiprocessing Testbed

In [43]:
start = time.time()
result = tuple(map(
    transform,
    scientists))

end = time.time()

print(f'\nTime to complete:{end - start:.2f}')


Processing record Ada Lovelace

Done processing recordAda Lovelace

Processing record Emmy Noether

Done processing recordEmmy Noether

Processing record Marie Curie

Done processing recordMarie Curie

Processing record Tu Youyou

Done processing recordTu Youyou

Processing record Ada Yonath

Done processing recordAda Yonath

Processing record Vera Rubin

Done processing recordVera Rubin

Processing record Sally Ride

Done processing recordSally Ride

Time to complete:7.00


# How to Create a multiprocessing.Pool() Object

In [44]:
help(multiprocessing.Pool())

Help on Pool in module multiprocessing.pool object:

class Pool(builtins.object)
 |  Pool(processes=None, initializer=None, initargs=(), maxtasksperchild=None, context=None)
 |  
 |  Class which supports an async version of applying functions to arguments.
 |  
 |  Methods defined here:
 |  
 |  __del__(self, _warn=<built-in function warn>, RUN='RUN')
 |      # Copy globals as function locals to make sure that they are available
 |      # during Python shutdown when the Pool is destroyed.
 |  
 |  __enter__(self)
 |  
 |  __exit__(self, exc_type, exc_val, exc_tb)
 |  
 |  __init__(self, processes=None, initializer=None, initargs=(), maxtasksperchild=None, context=None)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __reduce__(self)
 |      Helper for pickle.
 |  
 |  __repr__(self)
 |      Return repr(self).
 |  
 |  apply(self, func, args=(), kwds={})
 |      Equivalent of `func(*args, **kwds)`.
 |      Pool must be running.
 |  
 |  apply_async(self, 

In [46]:
start = time.time()


pool = multiprocessing.Pool()
result = pool.map(transform,scientists)

end = time.time()

print(f'\nTime to complete:{end - start:.2f}')


Processing record Ada Lovelace
Processing record Emmy Noether
Processing record Tu Youyou
Processing record Marie Curie
Processing record Ada Yonath
Processing record Vera Rubin
Processing record Sally Ride







Done processing recordAda Lovelace
Done processing recordMarie Curie
Done processing recordAda Yonath

Done processing recordTu Youyou
Done processing recordEmmy Noether
Done processing recordSally Ride
Done processing recordVera Rubin





Time to complete:1.16



# How to Use multiprocessing.Pool()


In [48]:
# add logging
import os


def transform(x):
    print(f'\nProcess {os.getpid()} working record {x.name}')
    time.sleep(1)
    print(f'\nProcess {os.getpid()} done processing record{x.name}')
    return {'name': x.name, 'age':2017- x.born}


start = time.time()


pool = multiprocessing.Pool()
result = pool.map(transform,scientists)

end = time.time()

print(f'\nTime to complete:{end - start:.2f}')


Process 4377 working record Ada Lovelace
Process 4379 working record Marie Curie
Process 4378 working record Emmy Noether
Process 4381 working record Ada Yonath
Process 4380 working record Tu Youyou
Process 4382 working record Vera Rubin
Process 4383 working record Sally Ride







pProcess 4377 done processing recordAda Lovelace
pProcess 4381 done processing recordAda Yonath
pProcess 4378 done processing recordEmmy Noether

pProcess 4380 done processing recordTu Youyou

pProcess 4383 done processing recordSally Ride
pProcess 4379 done processing recordMarie Curie
pProcess 4382 done processing recordVera Rubin




Time to complete:1.15



In [None]:
### Put a limit on the process

In [49]:
# add logging
import os


def transform(x):
    print(f'\nProcess {os.getpid()} working record {x.name}')
    time.sleep(1)
    print(f'\nProcess {os.getpid()} done processing record{x.name}')
    return {'name': x.name, 'age':2017- x.born}


start = time.time()


pool = multiprocessing.Pool(processes=1)
result = pool.map(transform,scientists)

end = time.time()

print(f'\nTime to complete:{end - start:.2f}')


Process 4452 working record Ada Lovelace

pProcess 4452 done processing recordAda Lovelace

Process 4452 working record Emmy Noether

pProcess 4452 done processing recordEmmy Noether

Process 4452 working record Marie Curie

pProcess 4452 done processing recordMarie Curie

Process 4452 working record Tu Youyou

pProcess 4452 done processing recordTu Youyou

Process 4452 working record Ada Yonath

pProcess 4452 done processing recordAda Yonath

Process 4452 working record Vera Rubin

pProcess 4452 done processing recordVera Rubin

Process 4452 working record Sally Ride

pProcess 4452 done processing recordSally Ride

Time to complete:7.07


## number of process by the total number of records

In [50]:
# add logging
import os


def transform(x):
    print(f'\nProcess {os.getpid()} working record {x.name}')
    time.sleep(1)
    print(f'\nProcess {os.getpid()} done processing record{x.name}')
    return {'name': x.name, 'age':2017- x.born}


start = time.time()


pool = multiprocessing.Pool(processes=len(scientists))
result = pool.map(transform,scientists)

end = time.time()

print(f'\nTime to complete:{end - start:.2f}')


Process 4512 working record Ada Lovelace
Process 4514 working record Marie Curie
Process 4513 working record Emmy Noether
Process 4515 working record Tu Youyou
Process 4517 working record Vera Rubin
Process 4516 working record Ada Yonath
Process 4518 working record Sally Ride







Process 4513 done processing recordEmmy Noether
Process 4512 done processing recordAda Lovelace
Process 4516 done processing recordAda Yonath
Process 4517 done processing recordVera Rubin
Process 4515 done processing recordTu Youyou
Process 4514 done processing recordMarie Curie
Process 4518 done processing recordSally Ride






Time to complete:1.09



# Parallel Processing With multiprocessing: Conclusion