In [43]:
from dask.distributed import Client
from time import sleep
from dask import delayed
from dask import compute

import pandas as pd
import numpy as np

In [54]:
client = Client(n_workers=4)

In [55]:
client.cluster

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 20,Total memory: 31.02 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:32915,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 20
Started: Just now,Total memory: 31.02 GiB

0,1
Comm: tcp://127.0.0.1:45147,Total threads: 5
Dashboard: http://127.0.0.1:41683/status,Memory: 7.76 GiB
Nanny: tcp://127.0.0.1:37563,
Local directory: /tmp/dask-scratch-space/worker-hvfaocwq,Local directory: /tmp/dask-scratch-space/worker-hvfaocwq

0,1
Comm: tcp://127.0.0.1:37183,Total threads: 5
Dashboard: http://127.0.0.1:41489/status,Memory: 7.76 GiB
Nanny: tcp://127.0.0.1:33425,
Local directory: /tmp/dask-scratch-space/worker-1rm7_ohm,Local directory: /tmp/dask-scratch-space/worker-1rm7_ohm

0,1
Comm: tcp://127.0.0.1:46445,Total threads: 5
Dashboard: http://127.0.0.1:39169/status,Memory: 7.76 GiB
Nanny: tcp://127.0.0.1:34103,
Local directory: /tmp/dask-scratch-space/worker-asysf0q4,Local directory: /tmp/dask-scratch-space/worker-asysf0q4

0,1
Comm: tcp://127.0.0.1:42225,Total threads: 5
Dashboard: http://127.0.0.1:39351/status,Memory: 7.76 GiB
Nanny: tcp://127.0.0.1:45727,
Local directory: /tmp/dask-scratch-space/worker-ybb0oo7h,Local directory: /tmp/dask-scratch-space/worker-ybb0oo7h


In [4]:
def inc(x):
    sleep(1)
    return x + 1

def add(x, y):
    sleep(1)
    return x + y

In [5]:
%%time
# This takes three seconds to run because we call each
# function sequentially, one after the other

x = inc(1)
y = inc(2)
z = add(x, y)

CPU times: user 385 ms, sys: 56.6 ms, total: 442 ms
Wall time: 3 s


In [6]:
%%time
# This runs immediately, all it does is build a graph

x = delayed(inc)(1)
y = delayed(inc)(2)
z = delayed(add)(x, y)

CPU times: user 220 µs, sys: 150 µs, total: 370 µs
Wall time: 312 µs


In [7]:
%%time
# This actually runs our computation using a local thread pool

z.compute()

CPU times: user 275 ms, sys: 31.3 ms, total: 306 ms
Wall time: 2.04 s


5

### Parallelize a for loop

In [8]:
data = [1, 2, 3, 4, 5, 6, 7, 8]

In [9]:
%%time
# Sequential code

results = []
for x in data:
    y = inc(x)
    results.append(y)
    
total = sum(results)

CPU times: user 912 ms, sys: 113 ms, total: 1.03 s
Wall time: 8.01 s


In [10]:
total

44

In [11]:
%%time
# Your parallel code here...
results = []
for x in data:
    y = delayed(inc)(x)
    results.append(y)

print(results)
total = delayed(sum)(results)
print(total)
total.compute()

[Delayed('inc-c4903be8-1243-478e-8954-cb7c1890c9a6'), Delayed('inc-3b3be9fd-4480-4e84-a6f9-27877496a4ea'), Delayed('inc-e77ab70b-5b78-433e-bb28-badcee7e561c'), Delayed('inc-7c1dd197-0da5-475b-ad85-484c33afd1c1'), Delayed('inc-172887f1-550a-47b6-a7dd-b849f319523b'), Delayed('inc-3ce3b421-d2cf-413e-be1f-231bf7a1fe18'), Delayed('inc-3918d890-835b-4d77-b7bf-91bb1945fbca'), Delayed('inc-d70e4738-9122-459f-bb29-1ca8ec21659f')]
Delayed('sum-b2a910a9-3c44-470d-87f7-243f08ef7a7c')
CPU times: user 144 ms, sys: 17.5 ms, total: 161 ms
Wall time: 1.04 s


44

In [20]:
filename='datasets/galadriel_dataset_24_09_18_high_res.h5'
df_input=pd.read_hdf(filename,'df_input')

In [22]:
in_array=df_input['order2'].to_numpy()

In [30]:
def square(x):
    return x**2
def sqr(x):
    return np.sqrt(x)

In [52]:
%%time
x2_list=[]
for x in in_array:
    x2=square(x)
    x2=sqr(x2)
    x2_list.append(x2)

CPU times: user 26.7 ms, sys: 0 ns, total: 26.7 ms
Wall time: 26 ms


In [51]:
%%time
x2_list=[]
for x in in_array:
    x2=delayed(square)(x)
    x2=delayed(sqr)(x2)
    x2_list.append(x2)
compute(x2_list)

CPU times: user 14.1 s, sys: 421 ms, total: 14.6 s
Wall time: 14.4 s


([34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,
  34400.0,

In [58]:
client.close()