In [9]:
%autosave 10
import numpy as np
import multiprocessing
import multiprocessing.pool
from threading import Thread
from functools import partial

n_cpu = multiprocessing.cpu_count()
repeat = n_cpu * 8

Autosaving every 10 seconds


In [5]:
from time import sleep

def f():
    sleep(1)
    print('hello')

t = Thread(target=f)
t.start()
print('world')
t.join()

world
hello


In [10]:
A = np.random.normal(0, 1, (64, 64, 5, 5))

def f(x):
    A = A.copy()
    A[0, 0] += x
    vals = np.linalg.eigvals(A)
    return np.linalg.norm(vals)

In [11]:
%%timeit

for x in range(repeat):
    f(x)

2.3 s ± 33.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
%%timeit

with multiprocessing.pool.ThreadPool() as pool:
    pool.map(f, range(repeat))

663 ms ± 51.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [22]:
class Eigen:
    def __init__(self):
        self.A = np.random.normal(0, 1, (64, 64, 5, 5))
    
    def f(self, x):
        A = self.A.copy()
        A[0, 0] += x
        vals = np.linalg.eigvals(A)
        return np.linalg.norm(vals)

e = Eigen()

In [18]:
%timeit [e.f(x) for x in range(repeat)]

2.26 s ± 10.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [23]:
class ThreadEigen(Eigen):
    def __init__(self, n_threads=multiprocessing.cpu_count()):
        super().__init__()
        self.n_threads = n_threads
    
    def worker(self, data, result):
        while True:
            try:
                x = data.pop()
            except IndexError:
                break
            result.append(self.f(x))
    
    def f_many(self, data):
        result = []
        
        worker = partial(self.worker, data=data, result=result)
        
        threads = []
        for _ in range(self.n_threads):
            t = Thread(target=worker)
            t.start()
            threads.append(t)
        
        for t in threads:
            t.join()
        
        return result
    
te = ThreadEigen()

In [24]:
%timeit te.f_many(list(range(repeat)))

726 ms ± 32.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [28]:
def f(n=1_000_000):
    a = np.random.normal(0, 1, n)
    return a[0]

def long_str(n, s='a'):
    for _ in range(n):
        s = s * 2
    return len(s)

In [29]:
%timeit [f() for _ in range(repeat)]

3.2 s ± 6.37 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [30]:
%%timeit

with multiprocessing.pool.ThreadPool() as pool:
    pool.map(f, [1_000_000] * repeat)

3.31 s ± 2.09 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [31]:
%timeit [long_str(27) for _ in range(repeat)]

17.6 s ± 328 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [33]:
%%timeit

with multiprocessing.pool.ThreadPool() as pool:
    pool.map(long_str, [27] * repeat)

15.3 s ± 115 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [35]:
%%timeit

with multiprocessing.Pool() as pool:
    result = pool.map(f, [1_000_000] * repeat)

620 ms ± 1.25 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [36]:
%%timeit

with multiprocessing.Pool() as pool:
    result = pool.map(long_str, [27] * repeat)

3.24 s ± 3.02 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [40]:
import pickle

b = pickle.dumps([1,2,3,4])
print(pickle.loads(b))

a = [1, 2, 3]
a.append(a)
b = pickle.dumps(a)
print(pickle.loads(b))

[1, 2, 3, 4]
[1, 2, 3, [...]]


In [41]:
pickle.dumps(lambda x: x**2)

PicklingError: Can't pickle <function <lambda> at 0x7f153cb90d08>: attribute lookup <lambda> on __main__ failed

In [43]:
def func_gen(x):
    def f():
        print(x)
    
    return f

func = func_gen(1)
pickle.dumps(func)

AttributeError: Can't pickle local object 'func_gen.<locals>.f'

In [44]:
pickle.dumps(func_gen)

b'\x80\x03c__main__\nfunc_gen\nq\x00.'

In [49]:
with multiprocessing.Pool() as pool:
    func = partial(np.power, 3)
    result = pool.map(func, np.arange(10))
print(result)

[1, 3, 9, 27, 81, 243, 729, 2187, 6561, 19683]


In [52]:
a = []

def f(x):
    a.append(x)
    return sum(a)

with multiprocessing.Pool() as pool:
    result = pool.map(f, [1]*16)
print(result)
print(a)

[1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 3, 3]
[]


In [None]:
joblib