### Create a dask cluster for our demostration of query data from snowflake in parallel

#### Set up project

In [1]:
import mlrun
import warnings
warnings.filterwarnings("ignore")

project_name = "snowflake-dask"
dask_cluster_name="snowflake-dask-cluster"
mlrun.set_environment(project=project_name, artifact_path="v3io://projects/snowflake-dask/artifacts")

> 2022-05-17 17:03:21,416 [info] created and saved project snowflake-dask


('snowflake-dask', 'v3io://projects/snowflake-dask/artifacts')

In [2]:
image='.xingsheng/snowflakedask'

#### Build an image with snowflake-connector-python, the dc.deploy() will only need to run once in an Iguazio cluster

In [3]:
dc = mlrun.new_function(dask_cluster_name, 
                        kind="dask", 
                        image="mlrun/ml-models", 
                        requirements=["bokeh", "snowflake-connector-python[pandas]"])
dc.apply(mlrun.mount_v3io())
dc.spec.build.image = image

In [4]:
# print(dc.to_yaml())

In [5]:
dc.deploy(with_mlrun=False) #run once for a cluster to build the image

> 2022-05-17 17:03:41,665 [info] Started building image: .xingsheng/snowflakedask
[36mINFO[0m[0000] Retrieving image manifest mlrun/ml-models:1.0.0 
[36mINFO[0m[0000] Retrieving image mlrun/ml-models:1.0.0 from registry index.docker.io 
[36mINFO[0m[0000] Built cross stage deps: map[]                
[36mINFO[0m[0000] Retrieving image manifest mlrun/ml-models:1.0.0 
[36mINFO[0m[0000] Returning cached image manifest              
[36mINFO[0m[0000] Executing 0 build triggers                   
[36mINFO[0m[0000] Unpacking rootfs as cmd RUN python -m pip install bokeh snowflake-connector-python[pandas] requires it. 
[36mINFO[0m[0068] RUN python -m pip install bokeh snowflake-connector-python[pandas] 
[36mINFO[0m[0068] Taking snapshot of full filesystem...        
[36mINFO[0m[0101] cmd: /bin/sh                                 
[36mINFO[0m[0101] args: [-c python -m pip install bokeh snowflake-connector-python[pandas]] 
[36mINFO[0m[0101] Running: [/bin/sh -c python -m p

True

#### Set up adpative scaling dask cluster
In this case, we are setting up an adaptive cluster wit minimum 1 worker and maximum to 10 workers

In [6]:
dc.spec.min_replicas = 1
dc.spec.max_replicas = 10
dc.spec.remote = True
dc.spec.service_type = "NodePort"
dc.with_requests(mem="4G", cpu="2")
dc.spec.scheduler_timeout = "5 days"
dc.spec.node_port=30066
dc.spec.image = image

In [7]:
client = dc.client

> 2022-05-17 17:10:23,405 [info] trying dask client at: tcp://mlrun-snowflake-dask-cluster-14563d22-6.default-tenant:8786
> 2022-05-17 17:10:23,428 [info] using remote dask scheduler (mlrun-snowflake-dask-cluster-14563d22-6) at: tcp://mlrun-snowflake-dask-cluster-14563d22-6.default-tenant:8786


In [None]:
print(client)