In [2]:
import numpy as np
import pandas as pd
import multiprocessing as mp

In [3]:
r = np.random.normal(0, .01, size=(1000, 10000))
num_threads = 24

In [4]:
def barrier_touch(r, width=.5):
    t = dict()
    p = np.log((1 + r).cumprod(axis=0))
    for j in range(r.shape[1]):
        for i in range(r.shape[0]):
            if p[i, j] >= width or p[i, j] <= -width:
                t[j] = i
                continue
    return t

In [5]:
%%time
parts = np.linspace(0, r.shape[0], min(num_threads, r.shape[0]) + 1)
parts = np.ceil(parts).astype(int)
jobs = []
for i  in range(1, len(parts)):
    jobs.append(r[:, parts[i-1]:parts[i]])
pool = mp.Pool(processes=num_threads)
out = []
outputs = pool.imap_unordered(barrier_touch, jobs)
for out_ in outputs:
    out.append(out_)
pool.close()
pool.join()

CPU times: user 25.3 ms, sys: 83.4 ms, total: 109 ms
Wall time: 318 ms


In [6]:
%%time
out = barrier_touch(r)

CPU times: user 4.53 s, sys: 84.2 ms, total: 4.61 s
Wall time: 4.61 s


In [7]:
def lin_parts(num_atoms, num_threads):
    parts = np.linspace(0, num_atoms, min(num_threads, num_atoms) + 1)
    parts = np.ceil(parts).astype(int)
    return parts

In [8]:
import pandas as pd
import numpy as np

def mp_pandas_obj(func, pd_obj, num_threads, mp_batches=1,
                  lin_mols=True, **kwargs):
    if lin_mols:
        parts = lin_parts(len(pd_obj[1]), num_threads * mp_batches)
    else:
        parts = nested_parts(len(pd_obj[1]), num_threads, mp_batches)
    jobs = []
    for i in range(1, len(parts)):
        job = {pd_obj[0]: pd_obj[1][parts[i-1]: parts[i]], 'func': func}
        job.update(kwargs)
        jobs.append(job)
    if num_threads == 1:
        out = process_jobs_(jobs)
    else:
        out = process_jobs_(jobs, num_threads=num_threads)
    if isinstance(out, pd.DataFrame):
        df0 = pd.DataFrame()
    elif isinstance(out, pd.Series):
        df0 = pd.Series()
    else:
        return out
    for i in out:
        df0 = df0.append(i)
    df0 = df0.sort_index()
    return df0