# Pool

Is an object that automatically creates a specified number of processes and applies some function in those threads.

In [1]:
import numpy as np
from math import sqrt
from random import random
from multiprocessing import Pool

In [2]:
sample_size = 1_000_000
features_size = 10

X = np.random.normal(size=(sample_size, features_size))
x = np.random.normal(size=features_size)

In [3]:
def compute_distances(X, a):
    return [
        sqrt(sum([(a_v-b_v)**2 for a_v, b_v in zip(a,b)]))
        for b in X
    ]

In [4]:
%%timeit -n 1
compute_distances(X, x)

7.67 s ± 131 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
split = np.array_split(X, 5)

In [6]:
%%timeit -n 1
compute_distances(split[0], x)

1.55 s ± 50.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## map

In [7]:
def wrapper(X):
    return compute_distances(X, x)

In [8]:
%%timeit -n 1
pool = Pool(processes=5)
pool.map(wrapper , split)
pool.close()
pool.join()

2.74 s ± 32.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
