# `dask.delayed`: processes vs threads

Here we use a (quite uneficient) python implementation of the euclidean distance matrix to understand how `dask.delayed` behaves with python code. Remember that before, what we run with `dask.delayed` was Scipy's `cdist` function.

In [None]:
import dask
import numpy as np

In [None]:
def euclidean_distance_matrix(x, y):
    num_samples = x.shape[0]
    dist_matrix = np.empty((num_samples, num_samples))
    for i, xi in enumerate(x):
        for j, yj in enumerate(y):
            diff = xi - yj
            dist_matrix[i][j] = diff.sum()
    return dist_matrix

In [None]:
x = np.random.random([1000, 50])

In [None]:
%%time
edm = euclidean_distance_matrix(x, x)

In [None]:
graph = [
    dask.delayed(euclidean_distance_matrix)(x, x),  # 1
    dask.delayed(euclidean_distance_matrix)(x, x),  # 2
    dask.delayed(euclidean_distance_matrix)(x, x),  # 3
    dask.delayed(euclidean_distance_matrix)(x, x),  # 4
    dask.delayed(euclidean_distance_matrix)(x, x),  # 5
    dask.delayed(euclidean_distance_matrix)(x, x),  # 6
    dask.delayed(euclidean_distance_matrix)(x, x),  # 7
    dask.delayed(euclidean_distance_matrix)(x, x),  # 8
    dask.delayed(euclidean_distance_matrix)(x, x),  # 9
    dask.delayed(euclidean_distance_matrix)(x, x),  # 10
    dask.delayed(euclidean_distance_matrix)(x, x),  # 11
    dask.delayed(euclidean_distance_matrix)(x, x),  # 12
]

In [None]:
%%time
edm = dask.compute(graph, scheduler='threads')

In [None]:
%%time
edm = dask.compute(graph, scheduler='processes')