# Start Interactive Dask Cluster

In [1]:
pip install dask-cloudprovider

Collecting dask-cloudprovider
  Downloading dask_cloudprovider-0.4.1-py3-none-any.whl (33 kB)
Collecting aiobotocore>=0.10.2
  Downloading aiobotocore-1.1.1-py3-none-any.whl (45 kB)
[K     |████████████████████████████████| 45 kB 2.5 MB/s  eta 0:00:01
[?25hCollecting dask>=2.2.0
  Downloading dask-2.27.0-py3-none-any.whl (843 kB)
[K     |████████████████████████████████| 843 kB 19.6 MB/s eta 0:00:01
[?25hCollecting distributed>=2.3.1
  Downloading distributed-2.27.0-py3-none-any.whl (652 kB)
[K     |████████████████████████████████| 652 kB 44.5 MB/s eta 0:00:01
Collecting botocore<1.17.45,>=1.17.44
  Downloading botocore-1.17.44-py2.py3-none-any.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 50.5 MB/s eta 0:00:01
[?25hCollecting aioitertools>=0.5.1
  Downloading aioitertools-0.7.0-py3-none-any.whl (20 kB)
Collecting toolz>=0.8.2
  Downloading toolz-0.11.1-py3-none-any.whl (55 kB)
[K     |████████████████████████████████| 55 kB 3.8 MB/s  eta 0:00:01
[?25hCollecti

**RESTART YOUR KERNEL**

## Imports

Import all packages used in this notebook.

In [1]:
import os

from azureml.widgets import RunDetails
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core import Workspace, Experiment, Datastore, Dataset, Environment

## Azure ML setup

Get the workspace.

In [2]:
ws = Workspace.from_config()
ws

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code FUA4B672G to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.


Workspace.create(name='qtmNpeMlWu2Ws0', subscription_id='6699340d-f248-4d52-8e5c-d0f06713b006', resource_group='qtmNpeMlWu2Rg0')

### Create VM pool

Create Azure ML VM pool for creating remote dask cluster(s).

In [3]:
ct_name ="dask-vnet-ct"
if ct_name not in ws.compute_targets:
    # create config for Azure ML cluster
    # change properties as needed
    config = AmlCompute.provisioning_configuration(
             vm_size                       = 'STANDARD_DS13_V2', # 8 core 56 GiB 112 SSD 
             min_nodes                     = 0,
             max_nodes                     = 100,
                vnet_resourcegroup_name=ws.resource_group,
                    vnet_name='testasa8a486745f283',
                subnet_name='default',
             idle_seconds_before_scaledown = 300
    )
    ct = ComputeTarget.create(ws, ct_name, config)
    ct.wait_for_completion(show_output=True)    
else:
    ct = ws.compute_targets[ct_name]
    
ct

AmlCompute(workspace=Workspace.create(name='qtmNpeMlWu2Ws0', subscription_id='6699340d-f248-4d52-8e5c-d0f06713b006', resource_group='qtmNpeMlWu2Rg0'), name=dask-vnet-ct, id=/subscriptions/6699340d-f248-4d52-8e5c-d0f06713b006/resourceGroups/qtmNpeMlWu2Rg0/providers/Microsoft.MachineLearningServices/workspaces/qtmNpeMlWu2Ws0/computes/dask-vnet-ct, type=AmlCompute, provisioning_state=Succeeded, location=westus2, tags=None)

In [5]:
#ct.delete()

### Start cluster



In [4]:
from dask_cloudprovider import AzureMLCluster

In [5]:
env_name = "dask-env2"
packages = ['mpi4py',
            'distributed',
            'dask[complete]',
            'dask-ml[complete]',
            'fastparquet',
            'pyarrow',
            'jupyterlab',
            'joblib',
            'notebook',
            'adlfs', 
            'fsspec', 
            'azureml-sdk[notebooks]',
            ' dask-lightgbm',
            'dask-xgboost',
            'lightgbm',
            'xgboost',
            'pandas',
            'lz4']

env = Environment(name=env_name)

for package in packages:
    env.python.conda_dependencies.add_pip_package(package)

In [6]:
from dask_cloudprovider import AzureMLCluster

cluster = AzureMLCluster(ws, 
                         compute_target=ct, 
                         environment_definition = env,
                         scheduler_idle_timeout=7200, 
                         jupyter=True, 
                         show_output=True)

############################## Setting up cluster ##############################




####################### Waiting for scheduler node's IP ########################
..........................................................

########################### Scheduler: 10.0.0.5:8786 ###########################
############################### On the same VNET ###############################
###################### Running in compute instance? True #######################
########################### Connections established ############################
############################# Scaling to 1 workers #############################
############################### Scaling is done ################################


In [7]:
cluster.scale(25) # need more than default quota for this 



In [8]:
cluster

VBox(children=(HTML(value='<h2>AzureMLCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n  …

In [9]:
cluster.scheduler_address

'tcp://10.0.0.5:8786'



In [None]:
from dask.distributed import Client
c = Client(cluster)
c

In [None]:
cluster.close()

In [None]:
help(AzureMLCluster)