# Demonstration of running DASK in a Kubeflow enabled Kubernetes Cluster

## Import

In [1]:
from dask_kubeflow.core import KubeflowCluster
import dask.array as da
from dask.distributed import Client
from time import sleep

entered import for dask_kubernetes


## Start KubeflowCluster

In [2]:
cluster = KubeflowCluster()

## Wait for workers to start

In [4]:
cluster.wait_for_workers(verbose=True)

wait time: 0 (sec), requested workers: 2, ready workers: 0
wait time: 1 (sec), requested workers: 2, ready workers: 0
wait time: 2 (sec), requested workers: 2, ready workers: 0
wait time: 3 (sec), requested workers: 2, ready workers: 0
wait time: 4 (sec), requested workers: 2, ready workers: 0
wait time: 5 (sec), requested workers: 2, ready workers: 0
wait time: 6 (sec), requested workers: 2, ready workers: 0
wait time: 7 (sec), requested workers: 2, ready workers: 0
wait time: 8 (sec), requested workers: 2, ready workers: 0
wait time: 9 (sec), requested workers: 2, ready workers: 0
wait time: 10 (sec), requested workers: 2, ready workers: 0
wait time: 20 (sec), requested workers: 2, ready workers: 0
wait time: 30 (sec), requested workers: 2, ready workers: 0
wait time: 40 (sec), requested workers: 2, ready workers: 0
wait time: 50 (sec), requested workers: 2, ready workers: 0
wait time: 60 (sec), requested workers: 2, ready workers: 0
wait time: 70 (sec), requested workers: 2, ready w

True

## Connect to DASK Scheduler

In [5]:
print(f'Scheduler located at {cluster.scheduler_service_address}')
client = Client(cluster.scheduler_service_address)

Scheduler located at tcp://dask-scheduler.kubeflow-user.svc.cluster.local:8786



+-------------+----------------+---------------+---------------+
| Package     | client         | scheduler     | workers       |
+-------------+----------------+---------------+---------------+
| blosc       | None           | 1.10.2        | 1.10.2        |
| cloudpickle | 1.6.0          | 2.0.0         | 2.0.0         |
| lz4         | None           | 3.1.10        | 3.1.10        |
| msgpack     | 1.0.3          | 1.0.2         | 1.0.2         |
| numpy       | 1.21.2         | 1.20.3        | 1.20.3        |
| pandas      | 1.2.4          | 1.3.4         | 1.3.4         |
| python      | 3.8.10.final.0 | 3.9.7.final.0 | 3.9.7.final.0 |
| toolz       | 0.11.1         | 0.11.2        | 0.11.2        |
+-------------+----------------+---------------+---------------+
Notes: 
-  msgpack: Variation is ok, as long as everything is above 0.6


In [7]:
%%time
# Create a large array and calculate the mean
array = da.ones((10000, 1000, 1000), chunks=100)
print(array.mean().compute())  # Should print 1.0

1.0
CPU times: user 1.2 s, sys: 64.2 ms, total: 1.26 s
Wall time: 16.9 s


## Scale up

In [8]:
cluster.scale(3)
cluser.wait_for_workers()

worker deployment name: dask-worker, type: <class 'str'>


ApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Cache-Control': 'no-cache, private', 'Content-Type': 'application/json', 'X-Kubernetes-Pf-Flowschema-Uid': 'c3f68a01-e331-4e2f-8fb3-924df3a8a0e6', 'X-Kubernetes-Pf-Prioritylevel-Uid': '7abfb23e-dad1-40d5-80d0-f86398a62510', 'Date': 'Sun, 19 Dec 2021 19:04:29 GMT', 'Content-Length': '242'})
HTTP response body: {
  "kind": "Status",
  "apiVersion": "v1",
  "metadata": {
    
  },
  "status": "Failure",
  "message": "the name of the object (dask-worker based on URL) was undeterminable: name must be provided",
  "reason": "BadRequest",
  "code": 400
}


## Close down

In [7]:
# close down Dask client connection
client.close()

# close down the Dask cluster
cluster.close()