# Multiprocessing In Python

Video Tutorial

https://www.youtube.com/watch?v=fKl2JW_qrso

## 1 Basic Multiprocessing Using `start()` and `join()`

In [11]:
import multiprocessing
import psutil
import time

In [111]:
def sleep(start):
    time.sleep(1)
    end = time.time()
    interval = end - start
    print('Slept for {} s'.format(interval))

In [112]:
sleep(time.time())

Slept for 1.0010337829589844 s


In [113]:
s0 = time.time()
sleep(s0)
sleep(s0)
print(log)

Slept for 1.0011968612670898 s
Slept for 2.0024800300598145 s
[]


In [114]:
from functools import partial 

# basic usage, pass the function that need to be parallelized into Process
# if param is needed, use partial()
s0 = time.time()
p1 = multiprocessing.Process(target=partial(sleep, start=s0))
p2 = multiprocessing.Process(target=partial(sleep, start=s0))

p1.start()
p2.start()


Slept for 1.0053908824920654 s
Slept for 1.008967638015747 s


In [115]:
# or just use args = []
s0 = time.time()
p1 = multiprocessing.Process(target=sleep, args=[s0])
p2 = multiprocessing.Process(target=sleep, args=[s0])

p1.start()
p2.start()

Slept for 1.005708932876587 s
Slept for 1.0098764896392822 s


In [116]:
s0 = time.time()
for _ in range(ncpu):
    sleep(s0)

Slept for 1.0011467933654785 s
Slept for 2.002225875854492 s
Slept for 3.0034024715423584 s
Slept for 4.0046226978302 s
Slept for 5.005927324295044 s
Slept for 6.006337404251099 s
Slept for 7.007627010345459 s
Slept for 8.008283138275146 s


In [117]:
ncpu = psutil.cpu_count()
s0 = time.time()
for _ in range(ncpu*2):
    p = multiprocessing.Process(target=partial(sleep, start=s0))
    p.start()
    
    
# this some time got executed before the multiprocess 
print('aha')  


aha
Slept for 1.0068423748016357 s
Slept for 1.0095231533050537 s
Slept for 1.0123026371002197 s
Slept for 1.0165667533874512 s
Slept for 1.0205693244934082 s
Slept for 1.0235199928283691 s
Slept for 1.0260255336761475 s
Slept for 1.0286574363708496 s
Slept for 1.0314502716064453 s
Slept for 1.0338845252990723 s
Slept for 1.0365734100341797 s
Slept for 1.040107250213623 s
Slept for 1.043205738067627 s
Slept for 1.0462050437927246 s
Slept for 1.0510985851287842 s
Slept for 1.0485646724700928 s


In [118]:
ncpu = psutil.cpu_count()
s0 = time.time()
for _ in range(ncpu):
    p = multiprocessing.Process(target=partial(sleep, start=s0))
    # use p.join() which means finish everything before this line
    p.start()
    # but if put it in the loop, it will wait until the current process finish
    # which make no different than the single process 
    p.join()  
    

Slept for 1.007488489151001 s
Slept for 2.015533685684204 s
Slept for 3.0250966548919678 s
Slept for 4.036656856536865 s
Slept for 5.049725532531738 s
Slept for 6.0611090660095215 s
Slept for 7.074568271636963 s
Slept for 8.08715534210205 s


In [120]:
ncpu = psutil.cpu_count()
s0 = time.time()
processes = []
for _ in range(ncpu*2):
    p = multiprocessing.Process(target=partial(sleep, start=s0))
    # use p.join() which means finish everything before this line
    p.start()
    # but if put it in the loop, it will wait until the STARTED process finish
    # which make no different than the single process 
    processes.append(p) 

# all the processes are STARTED so if we call join here
# it will execute all processes and then move down to print() function
for p in processes:
    p.join()


Slept for 1.0054244995117188 s
Slept for 1.0072746276855469 s
Slept for 1.0112919807434082 s
Slept for 1.0151615142822266 s
Slept for 1.0178275108337402 s
Slept for 1.0203397274017334 s
Slept for 1.023212194442749 s
Slept for 1.025681734085083 s
Slept for 1.0304615497589111 s
Slept for 1.0377569198608398 s
Slept for 1.0390913486480713 s
Slept for 1.0426864624023438 s
Slept for 1.0470097064971924 s
Slept for 1.0491464138031006 s
Slept for 1.0577900409698486 s
Slept for 1.059152603149414 s


## 2 Multiprocessing Using Context Manager `ProcessPoolExecutor` 

### 2.1 `ProcessPoolExecutor.submit(func, *args)`

Return a `Future` object instance that encapsulate the execution of the funciton

In [121]:
import concurrent.futures

In [126]:
def do_something(start):
    time.sleep(1)
    end = time.time()
    interval = end - start
    log = 'Slept for {} s'.format(interval)
    return log

In [127]:
# do the multiprocessing within a context manager
s0 = time.time()
with concurrent.futures.ProcessPoolExecutor() as executor:
    # submit() schedule a function for the executor
    # and return a Future object instance
    # Future object encapsulate the execution of the function 
    # and allow us to check on it after it been scheduled 
    for _ in range(10):
        f = executor.submit(do_something, s0)  # just pass the args or use the partial tool
        # use .result() to grab the returned value
        # it will wait until the scheduled process been executed!
        print(f.result())  

Slept for 1.028583288192749 s
Slept for 2.0316481590270996 s
Slept for 3.0348689556121826 s
Slept for 4.037087917327881 s
Slept for 5.039594411849976 s
Slept for 6.041833400726318 s
Slept for 7.043625116348267 s
Slept for 8.045630931854248 s
Slept for 9.047599077224731 s
Slept for 10.049738883972168 s


In [130]:
# Do the multiprocessing within a context manager
# Note that here the result is different from section 1
# Because by default, num_worker=num_threads = 8
s0 = time.time()
with concurrent.futures.ProcessPoolExecutor() as executor:
    processes = []
    for _ in range(ncpu*2):
        f = executor.submit(do_something, s0)
        processes.append(f)
    print("\n".join([p.result() for p in processes]))

Slept for 1.0279479026794434 s
Slept for 1.0279583930969238 s
Slept for 1.028503179550171 s
Slept for 1.0289034843444824 s
Slept for 1.0285060405731201 s
Slept for 1.0284929275512695 s
Slept for 1.028576374053955 s
Slept for 1.0290517807006836 s
Slept for 2.029646635055542 s
Slept for 2.029707908630371 s
Slept for 2.0300400257110596 s
Slept for 2.030409812927246 s
Slept for 2.030057668685913 s
Slept for 2.030407190322876 s
Slept for 2.031118154525757 s
Slept for 2.0311598777770996 s


In [131]:
# Do the multiprocessing within a context manager
# Note that here the result is different from section 1
# Because by default, max_workers=num_threads = 8
s0 = time.time()
with concurrent.futures.ProcessPoolExecutor() as executor:
    processes = []
    for _ in range(ncpu*2):
        f = executor.submit(do_something, s0)
        processes.append(f)
    print("\n".join([p.result() for p in processes]))

Slept for 1.0300021171569824 s
Slept for 1.0298991203308105 s
Slept for 1.0295798778533936 s
Slept for 1.030571699142456 s
Slept for 1.0306386947631836 s
Slept for 1.0298097133636475 s
Slept for 1.0316236019134521 s
Slept for 1.0316674709320068 s
Slept for 2.0313782691955566 s
Slept for 2.0315282344818115 s
Slept for 2.0316131114959717 s
Slept for 2.0317811965942383 s
Slept for 2.0313782691955566 s
Slept for 2.0323872566223145 s
Slept for 2.032423257827759 s
Slept for 2.0324223041534424 s


In [133]:
# now set the max worker = 16, this is larger than the thread we have
# but it still work, I guess this is because that once call on sleep
# the thread will be available again?
s0 = time.time()
with concurrent.futures.ProcessPoolExecutor(max_workers=16) as executor:
    processes = []
    for _ in range(ncpu*2):
        f = executor.submit(do_something, s0)
        processes.append(f)
    print("\n".join([p.result() for p in processes]))

Slept for 1.057748556137085 s
Slept for 1.0578391551971436 s
Slept for 1.0579161643981934 s
Slept for 1.058048963546753 s
Slept for 1.0579161643981934 s
Slept for 1.0583961009979248 s
Slept for 1.057793140411377 s
Slept for 1.0583992004394531 s
Slept for 1.0586652755737305 s
Slept for 1.058048963546753 s
Slept for 1.0583984851837158 s
Slept for 1.058934211730957 s
Slept for 1.058666706085205 s
Slept for 1.0591113567352295 s
Slept for 1.0591745376586914 s
Slept for 1.059211254119873 s


### 2.2 ProcessPoolExecutor.map(func, list)

- `.map(func, list)` will map the function to each of the items in the list and schedule them into multiprocess.
- `.map(func, list)` will return a list of returned value from each scheduled function instead of a Future object istance.

In [138]:
def do_something(start, t):
    time.sleep(t)
    end = time.time()
    log = 'Slept for {}s'.format(end-start)
    return log

print(do_something(time.time(), 2))

Slept for 2.002082109451294s


In [143]:
with concurrent.futures.ProcessPoolExecutor() as executor:
    s0 = time.time()
    arg_list = [(s0, t) for t in range(1, 17)]
    results = executor.map(do_something, arg_list)
    print([result for result in results])

TypeError: do_something() missing 1 required positional argument: 't'