## [multiprocessing — Process-based parallelism](https://docs.python.org/dev/library/multiprocessing.html)

In [1]:
import os
import time
from multiprocessing import Pool
def f(x):
    # time.sleep(0.5)
    # print(os.getpid())
    return x*x

with Pool(4) as p:
    print(p.map(f, [1,2,3,4,5,6,7,8]))
    

[1, 4, 9, 16, 25, 36, 49, 64]


In [3]:
from multiprocessing import Pool

def f(x):
    return x*x
    
pool = Pool(processes=4)
for i in pool.imap_unordered(f, range(10)):
    print(i, end=' ')

0 4 1 16 25 9 49 64 81 36 

### The Process class

In [4]:
from multiprocessing import Process
def f(name):
    print('hello', name)

p = Process(target=f, args=('bob',))
p.start()
p.join()

hello bob


In [5]:
from multiprocessing import Process
import os

def info(title):
    print(title)
    print('module name:', __name__)
    print('parent process:', os.getppid())
    print('process id:', os.getpid())

def f(name):
    info('function f')
    print('hello', name)

if __name__ == '__main__':
    info('main line')
    p = Process(target=f, args=('bob',))
    p.start()
    p.join()

main line
module name: __main__
parent process: 473180
process id: 764687
function f
module name: __main__
parent process: 764687
process id: 764891
hello bob


## [An introduction to parallel programming using Python's multiprocessing module](https://sebastianraschka.com/Articles/2014_multiprocessing.html)

- The Pool class

In [6]:
import multiprocessing as mp
def cube(x):
    return x**3

In [7]:
pool = mp.Pool(processes=4)
results = pool.map(cube, range(1,7))
print(results)

[1, 8, 27, 64, 125, 216]


In [8]:
pool = mp.Pool(processes=4)
results = [pool.apply(cube, args=(x,)) for x in range(1,7)]
print(results)

[1, 8, 27, 64, 125, 216]


In [9]:
from multiprocessing import Pool
from time import sleep 
import numpy as np 

def sumj(i, arr): 
    # print(i, os.getpid())
    s = np.sum(arr)

    sleep(0.5)
    return [i, s]
    # return np.sum(arr)

mat = np.ones((40, 10))
pool = Pool(processes=10)
results = [pool.apply_async(sumj, args=(i, mat[i,:])) for i in range(40)]
data = [i.get() for i in results]
print(data)


# if __name__ == "__main__":



[[0, 10.0], [1, 10.0], [2, 10.0], [3, 10.0], [4, 10.0], [5, 10.0], [6, 10.0], [7, 10.0], [8, 10.0], [9, 10.0], [10, 10.0], [11, 10.0], [12, 10.0], [13, 10.0], [14, 10.0], [15, 10.0], [16, 10.0], [17, 10.0], [18, 10.0], [19, 10.0], [20, 10.0], [21, 10.0], [22, 10.0], [23, 10.0], [24, 10.0], [25, 10.0], [26, 10.0], [27, 10.0], [28, 10.0], [29, 10.0], [30, 10.0], [31, 10.0], [32, 10.0], [33, 10.0], [34, 10.0], [35, 10.0], [36, 10.0], [37, 10.0], [38, 10.0], [39, 10.0]]


In [3]:
import multiprocessing as mp
import numpy as np  


def sumj(i, arr):
    print(i)
    return np.sum(arr)

mat = np.ones((4, 10))

pool = mp.Pool(processes=4)
data = [pool.apply(sumj, args=(i, mat[i, :])) for i in range(mat.shape[0])]
data

0
1
2
3


[10.0, 10.0, 10.0, 10.0]

In [22]:
pool = mp.Pool(processes=4)
results = [pool.apply_async(cube, args=(x,))
          for x in range(1,7)]
output = [p.get() for p in results]
print(output)

[1, 8, 27, 64, 125, 216]


[LINK](https://stackoverflow.com/questions/35908987/multiprocessing-map-vs-map-async)
`map_async` is non-blocking where as `map` is blocking.


In [14]:
from multiprocessing import Pool
import time

def f(x):
    sleep(1)
    print (x*x)

if __name__ == '__main__':
    pool = Pool(processes=4)
    # pool.map(f, range(10))
    r = pool.map_async(f, range(10))
    # DO STUFF
    print ('HERE')
    print ('MORE')
    r.wait()
    print ('DONE')

HERE
MORE
9410



36251649



8164

DONE


In [1]:
from multiprocessing import Pool

def f(x):
    print(x)
    return x*x

n_job = 4
with Pool(processes=n_job) as pool:
    results = pool.map_async(f, range(10)).get()
print(results)


0123



4567



89

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [8]:
from multiprocessing import Pool
import tqdm

def f(x):
    return x*x

n_job = 4

data = []
with Pool(processes=10) as pool:
        for d in tqdm.tqdm(
                pool.map_async(f, range(10)).get(),
                total=10):
            data.append(d)
print(data)

100%|██████████| 10/10 [00:00<00:00, 143640.55it/s]

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]





`pool.map(f, range(10))` will wait for all 10 of those function calls to finish so we see all the prints in a row. `r = pool.map_async(f, range(10))` will execute them asynchronously and only block when `r.wait()` is called so we see HERE and MORE in between but DONE will always be at the end.

In [26]:
import numpy as np 

def f():
    return np.random.rand()
pool = mp.Pool(processes=4)
results = [pool.apply_async(f) for _ in range(1,7)]
output = [p.get() for p in results]
print(output)

[0.48483214810476705, 0.48483214810476705, 0.48483214810476705, 0.48483214810476705, 0.6231289329357997, 0.9102145697442755]


In [2]:
import numpy as np
import multiprocessing as mp

def f(x, y):
    return x*y

pool = mp.Pool(processes=4)
results = [pool.apply_async(f, args=(x, y)) for x, y in zip(range(1,7), range(1,7))]
output = [p.get() for p in results]
print(output)


[1, 4, 9, 16, 25, 36]


In [1]:
# multiprocessing with starmap
import numpy as np
import multiprocessing as mp

def f(x, y):
    PID = os.getpid()
    return PID, x*y

pool = mp.Pool(processes=4)
results = pool.starmap(f, zip(range(1,7), range(1,7)))
print(results)


[(6996, 1), (6997, 4), (6998, 9), (6999, 16), (6996, 25), (6997, 36)]


In [2]:
import numpy as np
import multiprocessing as mp
import tqdm

def f(x, y):
    return x*y

pool = mp.Pool(processes=4)
results = [pool.apply_async(f, args=(x, y)) for x, y in zip(range(1,7), range(1,7))]
output = [p.get() for p in tqdm.tqdm(results, total=6)]
print(output)


100%|██████████| 6/6 [00:00<00:00, 53773.13it/s]

[1, 4, 9, 16, 25, 36]



