# Parallel job execution with dask - Using custom-defined modules

This notebook contains an example showing how to execute a function that uses a subroutine saved in `../example_code/very_hard_problems` on multiple remote workers in parallel.

In [None]:
import sys,os,time,dask.bag
import numpy as np
sys.path.append('/'.join(os.getcwd().split('/') + ['example_code']))
import very_hard_problems as vhp

In [None]:
from dask_kubernetes import KubeCluster
cluster = KubeCluster(n_workers=2)
cluster

In [None]:
from dask.distributed import Client, progress
c = Client(cluster)
c

In [None]:
# upload custom-defined modules
files_to_upload = [
    'very_hard_problems.py'
]

for file in files_to_upload:
    c.upload_file('example_code/' + file)

In [None]:
c.get_versions(check=True)

In [None]:
tic = time.time()
vhp.very_hard_problem(10000000)
print('Serial execution of very_hard_problem takes', time.time() - tic, 'seconds per process')

In [None]:
num_procs = 20
parameters = np.arange(10000000,10000000 + num_procs)

parameters_bag = dask.bag.from_sequence(parameters)
mapping = parameters_bag.map(vhp.very_hard_problem)

tic = time.time()
mapping.compute()
time_elapsed = time.time() - tic
print('Parallel execution takes ' + str(time_elapsed) + ' seconds in total, ' + str(time_elapsed/num_procs) + ' per process')