# Dask gateway workload

Specify Dask cluster,

In [1]:
import dask_gateway
import dask.distributed

  from distributed.utils import LoopRunner, format_bytes


In [2]:
gateway = dask_gateway.Gateway(
    "http://...",
)

In [3]:
gateway.list_clusters()

[]

In [4]:
gateway.cluster_options()

VBox(children=(HTML(value='<h2>Cluster Options</h2>'), GridBox(children=(HTML(value="<p style='font-weight: bo…

Create Dask gateway cluster,

In [5]:
%%time
# Note, this might take up to a few minutes, because of Docker image provisioning.
cluster = gateway.new_cluster(worker_cores=1)

CPU times: user 37.8 ms, sys: 4.59 ms, total: 42.3 ms
Wall time: 10.9 s


In [6]:
gateway.list_clusters()

[ClusterReport<name=dask-gateway-1.a6662ace9653458894c871dd7bff05b7, status=RUNNING>]

In [7]:
cluster.scale(1)  # scale cluster

Connect local client process,

In [8]:
client = dask.distributed.Client(cluster)

In [None]:
client

Specify a Dask workload,

In [10]:
import numpy, dask.array

In [11]:
def calculate_pi(size_in_terabytes, chunk_size_in_megabytes):
    """Calculate pi using a Monte Carlo method."""

    total_array_size = (int(size_in_terabytes * 1e12 / 8 / 2), 2)
    number_of_tasks = (size_in_terabytes * 1e12) / (chunk_size_in_megabytes * 1e6)
    array_chunk_size = (int(total_array_size[0] / number_of_tasks), 2)

    xy = dask.array.random.uniform(
        low=0.0, high=1.0,
        size=total_array_size,
        chunks=array_chunk_size
    )

    xy_inside_circle = (xy ** 2).sum(axis=1) < 1

    pi = 4 * xy_inside_circle.mean()

    return pi

In [12]:
workload_size_in_terabytes = 0.001
workload_chunk_size_in_megabytes = 20 # ~memory/cpu, i.e. Dask worker specific!
print(f"there will be {(workload_size_in_terabytes*1e12)/(workload_chunk_size_in_megabytes*1e6)} chunks to process")

there will be 50.0 chunks to process


In [13]:
pi = calculate_pi(
    size_in_terabytes=workload_size_in_terabytes,
    chunk_size_in_megabytes=workload_chunk_size_in_megabytes
)

In [14]:
%%time
pi.compute()

CPU times: user 38.1 ms, sys: 3.84 ms, total: 41.9 ms
Wall time: 9.81 s


3.141701952

Code taken from,
* https://github.com/ExaESM-WP4/Dask-scheduling-scenarios/blob/ed61481965b9a6faee2e97437a436f75c20b86a9/define-pi-workload.py
* https://github.com/ExaESM-WP4/Dask-scheduling-scenarios/blob/ed61481965b9a6faee2e97437a436f75c20b86a9/02_Fixed-jobqueue-cluster.ipynb

Python environment,

In [15]:
pip list

Package                       Version
----------------------------- --------------------------
adal                          1.2.7
adlfs                         2021.7.1
affine                        2.3.0
aiobotocore                   1.3.3
aiohttp                       3.7.4.post0
aioitertools                  0.7.1
alembic                       1.6.5
amqp                          5.0.6
anyio                         3.2.1
appdirs                       1.4.4
argon2-cffi                   20.1.0
asciitree                     0.3.3
asgiref                       3.4.1
asn1crypto                    1.4.0
astropy                       4.2.1
async-generator               1.10
async-timeout                 3.0.1
attrs                         21.2.0
av                            8.0.3
awscli                        1.19.106
azure-core                    1.14.0
azure-datalake-store          0.0.51
azure-identity                1.6.0
azure-storage-blob            12.8.1
Babel                    

In [16]:
!conda list --explicit

# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
@EXPLICIT
https://conda.anaconda.org/conda-forge/linux-64/gh-1.12.1-ha8f183a_1.tar.bz2
https://conda.anaconda.org/conda-forge/noarch/nomkl-1.0-h5ca1d4c_0.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2021.5.30-ha878542_0.tar.bz2
https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2
https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2
https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2
https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2
https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_14.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2