# Section 6: Parallel Processing with `concurent.futures`

In [7]:
import collections
import time
import multiprocessing
import os



Scientist = collections.namedtuple('Scientist', [
    'name',
    'field',
    'born',
    'nobel',
])

scientists = (
    Scientist(name='Ada Lovelace', field='math', born=1815, nobel=False),
    Scientist(name='Emmy Noether', field='math', born=1882, nobel=False),
    Scientist(name='Marie Curie', field='math', born=1867, nobel=True),
    Scientist(name='Tu Youyou', field='physics', born=1930, nobel=True),
    Scientist(name='Ada Yonath', field='chemistry', born=1939, nobel=True),
    Scientist(name='Vera Rubin', field='chemistry', born=1928, nobel=False),
    Scientist(name='Sally Ride', field='physics', born=1951, nobel=False),
)

In [9]:
def transform(x):
    print(f'\nProcess {os.getpid()} working record {x.name}')
    time.sleep(1)
    print(f'\nProcess {os.getpid()} done processing record{x.name}')
    return {'name': x.name, 'age':2017- x.born}


start = time.time()


pool = multiprocessing.Pool()
result = pool.map(transform,scientists)

end = time.time()

print(f'\nTime to complete:{end - start:.2f}')


Process 4659 working record Ada Yonath
Process 4661 working record Sally Ride
Process 4655 working record Ada Lovelace
Process 4660 working record Vera Rubin
Process 4658 working record Tu Youyou
Process 4657 working record Marie Curie
Process 4656 working record Emmy Noether







Process 4659 done processing recordAda Yonath

Process 4660 done processing recordVera Rubin
Process 4657 done processing recordMarie Curie
Process 4655 done processing recordAda Lovelace
Process 4658 done processing recordTu Youyou
Process 4661 done processing recordSally Ride
Process 4656 done processing recordEmmy Noether





Time to complete:1.11




# The concurrent.futures Module


In [10]:
import concurrent.futures

In [11]:
def transform(x):
    print(f'\nProcess {os.getpid()} working record {x.name}')
    time.sleep(1)
    print(f'\nProcess {os.getpid()} done processing record{x.name}')
    return {'name': x.name, 'age':2017- x.born}


start = time.time()


with concurrent.futures.ProcessPoolExecutor() as executor:
    result = executor.map(transform, scientists)

end = time.time()

print(f'\nTime to complete:{end - start:.2f}')


Process 4730 working record Ada Lovelace
Process 4731 working record Emmy Noether
Process 4733 working record Tu Youyou
Process 4732 working record Marie Curie
Process 4734 working record Ada Yonath
Process 4735 working record Vera Rubin
Process 4736 working record Sally Ride







Process 4731 done processing recordEmmy Noether
Process 4732 done processing recordMarie Curie
Process 4730 done processing recordAda Lovelace
Process 4735 done processing recordVera Rubin
Process 4734 done processing recordAda Yonath
Process 4736 done processing recordSally Ride
Process 4733 done processing recordTu Youyou







Time to complete:1.15


In [12]:
tuple(result)

({'name': 'Ada Lovelace', 'age': 202},
 {'name': 'Emmy Noether', 'age': 135},
 {'name': 'Marie Curie', 'age': 150},
 {'name': 'Tu Youyou', 'age': 87},
 {'name': 'Ada Yonath', 'age': 78},
 {'name': 'Vera Rubin', 'age': 89},
 {'name': 'Sally Ride', 'age': 66})

# How Functional Programing Makes Parallel Processing Simple

# concurrent.futures vs multiprocessing
- multiple options like threads
- `concurrent.futures.ThreadPoolExecutor()`

In [14]:
def transform(x):
    print(f'\nProcess {os.getpid()} working record {x.name}')
    time.sleep(1)
    print(f'\nProcess {os.getpid()} done processing record{x.name}')
    return {'name': x.name, 'age':2017- x.born}


start = time.time()


with concurrent.futures.ThreadPoolExecutor() as executor:
    result = executor.map(transform, scientists)

end = time.time()

print(f'\nTime to complete:{end - start:.2f}')


Process 4578 working record Ada Lovelace

Process 4578 working record Emmy Noether

Process 4578 working record Marie Curie

Process 4578 working record Tu Youyou
Process 4578 working record Ada Yonath


Process 4578 working record Vera Rubin

Process 4578 working record Sally Ride

Process 4578 done processing recordMarie Curie
Process 4578 done processing recordEmmy Noether

Process 4578 done processing recordAda Lovelace


Process 4578 done processing recordTu Youyou
Process 4578 done processing recordAda Yonath


Process 4578 done processing recordVera Rubin

Process 4578 done processing recordSally Ride

Time to complete:1.00


# When to Use concurrent.futures or multiprocessing
- global inerpreter lock problem
- python no two trheds can execute python code ath the same time.
- `concurrent.futures.ProcessPoolExecutor()` gets around it