# Start Interactive Dask Cluster

In [1]:
pip install dask-cloudprovider

Note: you may need to restart the kernel to use updated packages.


**RESTART YOUR KERNEL**

## Imports

Import all packages used in this notebook.

In [1]:
import os

from azureml.widgets import RunDetails
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core import Workspace, Experiment, Datastore, Dataset, Environment

## Azure ML setup

Get the workspace.

In [2]:
ws = Workspace.from_config()
ws

Workspace.create(name='ws01ent', subscription_id='0e9bace8-7a81-4922-83b5-d995ff706507', resource_group='azureml')

## Create vnet
Create an Azure Virtual Network (vnet) in the same resource group as the Azure ML Workspace.

In [None]:
import os 

cmd = f'az network vnet create --name dask-vnet --resource-group {ws.resource_group} --subnet-name default'

os.system(cmd)

### Create VM pool

Create Azure ML VM pool for creating remote dask cluster(s).

In [5]:
ct_name ="dask-vnet-ct"
if ct_name not in ws.compute_targets:
    # create config for Azure ML cluster
    # change properties as needed
    config = AmlCompute.provisioning_configuration(
             vm_size                       = 'STANDARD_DS13_V2', # 8 core 56 GiB 112 SSD 
             min_nodes                     = 0,
             max_nodes                     = 100,
                vnet_resourcegroup_name=ws.resource_group,
                    vnet_name='dask-vnet',
                subnet_name='default',
             idle_seconds_before_scaledown = 300
    )
    ct = ComputeTarget.create(ws, ct_name, config)
    ct.wait_for_completion(show_output=True)    
else:
    ct = ws.compute_targets[ct_name]
    
ct

AmlCompute(workspace=Workspace.create(name='ws01ent', subscription_id='0e9bace8-7a81-4922-83b5-d995ff706507', resource_group='azureml'), name=dask-vnet-ct, id=/subscriptions/0e9bace8-7a81-4922-83b5-d995ff706507/resourceGroups/azureml/providers/Microsoft.MachineLearningServices/workspaces/ws01ent/computes/dask-vnet-ct, type=AmlCompute, provisioning_state=Succeeded, location=westus2, tags=None)

In [5]:
#ct.delete()

### Start cluster



In [6]:
from dask_cloudprovider import AzureMLCluster

In [7]:
env_name = "dask-env2"
packages = ['mpi4py',
            'distributed',
            'dask[complete]',
            'dask-ml[complete]',
            'fastparquet',
            'pyarrow',
            'jupyterlab',
            'joblib',
            'notebook',
            'adlfs', 
            'fsspec', 
            'azureml-sdk[notebooks]',
            ' dask-lightgbm',
            'dask-xgboost',
            'lightgbm',
            'xgboost',
            'pandas',
            'lz4']

env = Environment(name=env_name)

for package in packages:
    env.python.conda_dependencies.add_pip_package(package)

In [13]:
from dask_cloudprovider import AzureMLCluster

cluster = AzureMLCluster(ws, 
                         compute_target=ct, 
                         environment_definition = env,
                         scheduler_idle_timeout=7200, 
                         jupyter=True, 
                         show_output=True)



############################## Setting up cluster ##############################
####################### Waiting for scheduler node's IP ########################
..............................................................

########################### Scheduler: 10.0.0.5:8786 ###########################
############################### On the same VNET ###############################
###################### Running in compute instance? True #######################
########################### Connections established ############################
############################# Scaling to 1 workers #############################
############################### Scaling is done ################################


In [15]:
cluster.scale(5) # need more than default quota for this 

############################# Number of workers: 5 #############################


In [10]:
cluster

VBox(children=(HTML(value='<h2>AzureMLCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n  …

In [18]:
cluster.scheduler_address
#Remember to use this address to create your client 

'tcp://10.0.0.5:8786'

In [None]:
from dask.distributed import Client
c = Client(cluster)
c

In [19]:
cluster.close()


################### Scheduler and workers are disconnected. ####################


In [None]:
help(AzureMLCluster)

In [17]:
cluster

VBox(children=(HTML(value='<h2>AzureMLCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n  …