# Start Interactive Dask Cluster

In [None]:
pip install git+https://github.com/drabastomek/dask-cloudprovider

**RESTART YOUR KERNEL**

## Imports

Import all packages used in this notebook.

In [1]:
import os

from azureml.widgets import RunDetails
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core import Workspace, Experiment, Datastore, Dataset, Environment

## Azure ML setup

Get the workspace.

In [2]:
ws = Workspace.from_config()
ws

Workspace.create(name='dask-cloudprovider', subscription_id='6560575d-fa06-4e7d-95fb-f962e74efd7a', resource_group='cody-rg')

### Enter your name

Enter your name and virtual network information.

In [3]:
### name
name        = 'dask'             # REPLACE

### vnet settings
vnet_rg     = ws.resource_group  # replace if needed
vnet_name   = 'wifi'       # replace if needed
subnet_name = 'default'          # replace if needed

### azure ml names 
ct_name     = f'{name}-ct'
env_name    = f'{name}-env'

### trust but verify
verify = f'''
Name: {name}

vNET RG: {vnet_rg}
vNET name: {vnet_name}
vNET subnet name: {subnet_name}

Compute target: {ct_name}
Environment name: {env_name}
'''

print(verify)


Name: dask

vNET RG: cody-rg
vNET name: wifi
vNET subnet name: default

Compute target: dask-ct
Environment name: dask-env



### Create VM pool

Create Azure ML VM pool for creating remote dask cluster(s).

In [4]:
if ct_name not in ws.compute_targets:
    # create config for Azure ML cluster
    # change properties as needed
    config = AmlCompute.provisioning_configuration(
             vm_size                       = 'STANDARD_DS13_V2', # 8 core 56 GiB 112 SSD 
             min_nodes                     = 0,
             max_nodes                     = 100,
             vnet_resourcegroup_name       = vnet_rg,              
             vnet_name                     = vnet_name,         
             subnet_name                   = subnet_name,          
             idle_seconds_before_scaledown = 300
    )
    ct = ComputeTarget.create(ws, ct_name, config)
    ct.wait_for_completion(show_output=True)    
else:
    ct = ws.compute_targets[ct_name]
    
ct

AmlCompute(workspace=Workspace.create(name='dask-cloudprovider', subscription_id='6560575d-fa06-4e7d-95fb-f962e74efd7a', resource_group='cody-rg'), name=dask-ct, id=/subscriptions/6560575d-fa06-4e7d-95fb-f962e74efd7a/resourceGroups/cody-rg/providers/Microsoft.MachineLearningServices/workspaces/dask-cloudprovider/computes/dask-ct, type=AmlCompute, provisioning_state=Succeeded, location=westeurope, tags=None)

## Startup cluster

Start the run now. The first time, this will take 

### Mount Compute Instance code fileshare

This will create the compute instance code fileshare as a datastore. The default name `code-391ff5ac-6576-460f-ba4d-7e03433c68b6` and has the same credentials as the default fileshare for the workspace. This will be mounted for easy access to notebooks on the cluster.

In [5]:
codefileshare = 'codefileshare'

if codefileshare not in ws.datastores:
    Datastore.register_azure_file_share(ws, codefileshare,
                                        'code-391ff5ac-6576-460f-ba4d-7e03433c68b6',                    
                                        account_name = ws.datastores['workspacefilestore'].account_name, 
                                        account_key  = ws.datastores['workspacefilestore'].account_key   
                                       )

### Get data

This will get NOAA ISD Weather data which is used in the demo. If you already have data, you can skip this.

In [6]:
daskdata = 'noaa-isd-files'

if daskdata not in ws.datasets and False:
    ds = Dataset.File.from_files('https://azureopendatastorage.blob.core.windows.net/isdweatherdatacontainer/ISDWeather/*/*/*.parquet', validate=False)
    os.system('sudo chmod 777 /mnt')
    ds.download('/mnt/data/isd')
    ws.datastores['gen2'].upload('/mnt/data/isd', '/noaa-isd')
    ds = ds.register(ws, daskdata)

### Start cluster

This will soon be replaced.

In [7]:
from dask_cloudprovider import AzureMLCluster

In [8]:
packages = ['mpi4py',
            'distributed',
            'dask[complete]',
            'dask-ml[complete]',
            'fastparquet',
            'pyarrow',
            'jupyterlab',
            'joblib',
            'notebook',
            'adlfs', 
            'fsspec', 
            'azureml-sdk[notebooks]',
            'azureml-dataprep[fuse]',
            'lz4']

env = Environment(name=env_name)

for package in packages:
    env.python.conda_dependencies.add_pip_package(package)

In [9]:
cluster = AzureMLCluster(ws, 
                         ct, 
                         env, 
                         jupyter=True, 
                         initial_nodes=1,
                         datastores=[ws.datastores[datastore] for datastore in ws.datastores]
                        )



############################## Setting up cluster ##############################
########################## Submitting the experiment ###########################
####################### Waiting for scheduler node's IP ########################
.......................................................................


########################## Scheduler: 10.12.0.6:8786 ###########################
############################### On the same VNET ###############################
########################### Connections established ############################
############################# Scaling to 1 workers #############################
############################### Scaling is done ################################


In [10]:
cluster.run

Experiment,Id,Type,Status,Details Page,Docs Page
dask-cloudprovider,dask-cloudprovider_1581557662_6e28cfc1,azureml.scriptrun,Running,Link to Azure Machine Learning studio,Link to Documentation


In [11]:
cluster.scale(10)

In [12]:
cluster

VBox(children=(HTML(value='<h2>AzureMLCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n  …

In [None]:
from dask.distributed import Client

c = Client(cluster)

In [None]:
help(AzureMLCluster)

In [13]:
cluster.close()

################### Scheduler and workers are disconnected. ####################
