# Dask Delayed - Parallelism

- Delays the execution of tasks.
- Builds a task graph
- Dask scheduler exploits the parallelism from the task graph
- Useful for cases that don't fit with the collections

In [None]:
!pip install memory_profiler

In [None]:
# Importing dask dataframe
import dask
import dask.delayed as delayed
import time
%load_ext memory_profiler
dask.__version__

### Simple Example  

Let us consider the following mathematical steps

Increase the value by one
\begin{align}
 x & = x + 1
\end{align}
Decrease the value by two
\begin{align}
 y & = y - 2
\end{align}
Add two values
\begin{align}
 z & = x + y
\end{align}

#### Task Order
- x and y can be evaluated independently
- z value depends on the evaluation of x and y

In [None]:
# Define simple functions for increament, decrement, and add operations
sleep_time = 1
def inc(x):
    time.sleep(sleep_time)
    return x + 1

def dec(x):
    time.sleep(sleep_time)
    return x - 1

def add(x, y):
    time.sleep(sleep_time)
    return x + y

Serial task executions. Each function takes a second, so executing three functions in sequeence will take three seconds. 

In [None]:
%%time
x = inc(1)
y = dec(2)
z = add(x, y)
print("computed result ", z)

Now lets build the task-graph with dask.delayed function. 

In [None]:
delayed_inc = delayed(inc)
delayed_dec = delayed(dec)
delayed_add = delayed(add)

In [None]:
x = delayed_inc(1)
y = delayed_dec(2)
z = delayed_add(x, y)
z

In [None]:
z.visualize(rankdir='LR')

The task-graph shows three tasks. Two of them are independent and they can be executed concurrently. So the total execution time will be two seconds. 

In [None]:
%%time
print("computed result ", z.compute())

## Parallelizing a for loop

In [None]:
data = list(range(0,10))

In [None]:
%%time
results = []
for i in data:
    x = inc(i)
    y = dec(i)
    z = add(x, y)
    results.append(z) 
total = sum(results)
print("computed result ", total)

In [None]:
%%time
results = []
for i in data:
    d_x = delayed(inc)(i)
    d_y = delayed(dec)(i)
    d_z = delayed(add)(d_x, d_y)
    results.append(d_z)  
total = delayed(sum)(results)
print("computed result ", total.compute())

In [None]:
total.visualize(rankdir='LR')

### Dask Scheduler - Threads (Default)

In [None]:
%%time
results = []
for i in data:
    d_x = delayed(inc)(i)
    d_y = delayed(dec)(i)
    d_z = delayed(add)(d_x, d_y)
    results.append(d_z)
    
total = delayed(sum)(results)
print("computed result ", total.compute(scheduler='threads'))

### Dask Scheduler - Processes

In [None]:
%%time
results = []
for i in data:
    d_x = delayed(inc)(i)
    d_y = delayed(dec)(i)
    d_z = delayed(add)(d_x, d_y)
    results.append(d_z)
    
total = delayed(sum)(results)
print("computed result ", total.compute(scheduler='processes'))

### Dask Scheduler - single-threaded (for debugging)

In [None]:
%%time

results = []
for i in data:
    d_x = delayed(inc)(i)
    d_y = delayed(dec)(i)
    d_z = delayed(add)(d_x, d_y)
    results.append(d_z)
    
total = delayed(sum)(results)
print("computed result ", total.compute(scheduler='single-threaded'))

### Dask Distributed Scheduler (For scaling)

In [None]:
from dask.distributed import Client, LocalCluster
client = Client(n_workers=2, threads_per_worker=2, memory_limit='4GB')
client 

In [None]:
%%time

results = []
for i in data:
    d_x = delayed(inc)(i)
    d_y = delayed(dec)(i)
    d_z = delayed(add)(d_x, d_y)
    results.append(d_z)
    
total = delayed(sum)(results)
print("computed result ", total.compute())

In [None]:
client.shutdown()

In [None]:
from dask.distributed import Client, LocalCluster
client = Client(n_workers=8, threads_per_worker=2, memory_limit='4GB')
client 

In [None]:
%%time

results = []
for i in data:
    d_x = delayed(inc)(i)
    d_y = delayed(dec)(i)
    d_z = delayed(add)(d_x, d_y)
    results.append(d_z)
    
total = delayed(sum)(results)
print("computed result ", total.compute())

In [None]:
client.shutdown()