# Parallel job execution with joblib

This notebook contains an example showing how to execute a function on multiple remote workers in parallel.

In [None]:
import numpy as np
import dask,time
import dask.bag
from sklearn.externals import joblib

In [None]:
from dask_kubernetes import KubeCluster
cluster = KubeCluster(n_workers=2)
cluster

In [None]:
from dask.distributed import Client, progress
c = Client(cluster)
c

In [None]:
c.get_versions(check=True)

In [None]:
def very_hard_problem(zahl):
    res = 0
    for n in np.arange(zahl):
        res = res + n
    return res

In [None]:
tic = time.time()
very_hard_problem(10000000)
print(time.time() - tic,'seconds')

In [None]:
num_procs = 20

In [None]:
tic = time.time()
with joblib.parallel_backend('dask'):
    result_list = joblib.Parallel()(
        joblib.delayed(very_hard_problem)(i)
        for i in np.arange(10000000,10000000 + num_procs)
    )
time_elapsed = time.time() - tic

print(str(time_elapsed) + ' seconds in total, ' + str(time_elapsed/num_procs) + ' per process')

In [None]:
params_bag = dask.bag.from_sequence(np.arange(10000000,10000000 + num_procs))

In [None]:
tic = time.time()
result_list = params_bag.map(lambda x: very_hard_problem(x)).compute()
time_elapsed = time.time() - tic

print(str(time_elapsed) + ' seconds in total, ' + str(time_elapsed/num_procs) + ' per process')