# Accelerated Machine Learning at Scale with NVIDIA RAPIDS on Microsoft Azure ML

## Tom Drabas (Microsoft), Manuel Reyes-Gomez (NVIDIA)

### Install dask_cloudprovider

In [2]:
!pip install --upgrade git+https://github.com/drabastomek/dask-cloudprovider

Collecting git+https://github.com/drabastomek/dask-cloudprovider
  Cloning https://github.com/drabastomek/dask-cloudprovider to /tmp/pip-req-build-6uw1vyfc
  Running command git clone -q https://github.com/drabastomek/dask-cloudprovider /tmp/pip-req-build-6uw1vyfc
Collecting aiobotocore>=0.10.2 (from dask-cloudprovider==0.1.1+206.gd708ee2)
[?25l  Downloading https://files.pythonhosted.org/packages/8c/eb/53443d088023f016ba076d23b8aa7d223bed7938e2eba4d1c06f52b3c0eb/aiobotocore-1.0.1-py3-none-any.whl (40kB)
[K     |████████████████████████████████| 40kB 13.6MB/s eta 0:00:01
Collecting botocore<1.15.33,>=1.15.32 (from aiobotocore>=0.10.2->dask-cloudprovider==0.1.1+206.gd708ee2)
[?25l  Downloading https://files.pythonhosted.org/packages/49/86/6448bb5ab4b0c169f379fce589e568e798907b569eaeb012c720a4dd9ca2/botocore-1.15.32-py2.py3-none-any.whl (6.0MB)
[K     |████████████████████████████████| 6.1MB 8.1MB/s eta 0:00:01
[?25hCollecting aioitertools>=0.5.1 (from aiobotocore>=0.10.2->dask-clou

### Imports

In [3]:
import azureml_setup
from dask_cloudprovider import AzureMLCluster

### Setup

#### Workspace

Documentation: [Workspace](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace.workspace?view=azure-ml-py)

In [4]:
workshop_id = 53                      ### <<<<----- CHANGE THIS
workspace_name = "GTC2020EuropeWest"  ### <<<<----- CHANGE THIS
vnet_name = 'gtc2020-vnet-westeu'     ### <<<<----- CHANGE THIS

### <<<--- DO NOT CHANGE ANYTHING BELOW --->>> ###
subscription_id = "04c74f0f-e88d-4b6e-83bb-1242060caec7"
resource_group = "GTC2020Workshop"
tenant_id = "43083d15-7273-40c1-b7db-39efd9ccc17a"

In [5]:
ws = azureml_setup.get_workspace(
    subscription_id = subscription_id,
    resource_group = resource_group,
    workspace_name = workspace_name,
    tenant_id = tenant_id
)

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code AWH2KLRHM to authenticate.
Interactive authentication successfully completed.


#### Parameters

In [6]:
########################################################
### DO NOT CHANGE ANYTHING BELOW
########################################################

name = f'GTC20-{workshop_id:03d}'.lower()

### vnet settings
vnet_rg = ws.resource_group
subnet_name = 'default'

### azure ml names
ct_name  = f'{name}-ct'
exp_name = f'{name}-workshop'

### trust but verify
verify = f'''
Name: {name}

vNET RG: {vnet_rg}
vNET name: {vnet_name}
vNET subnet name: {subnet_name}

Compute target: {ct_name}
Experiment name: {exp_name}
'''

print(verify)


Name: gtc20-053

vNET RG: GTC2020Workshop
vNET name: gtc2020-vnet-westeu
vNET subnet name: default

Compute target: gtc20-053-ct
Experiment name: gtc20-053-workshop



#### Compute target
Documentation: [Compute target](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.compute.amlcompute(class)?view=azure-ml-py)

In [7]:
# ws = get_workspace()
ct = azureml_setup.get_compute_target(
      ws
    , ct_name
    , vm_name='STANDARD_ND6S'
    , min_nodes=2
    , max_nodes=2
    , vnet_rg=vnet_rg
    , vnet_name=vnet_name
    , subnet_name=subnet_name
)

Creating
Succeeded..................
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


#### Environment
Documentation: [Environment](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.environment?view=azure-ml-py)

In [8]:
env = azureml_setup.get_environment(ws, environment_name = 'gtc20_GPU_ENV')

### Create Azure ML Dask cluster

In [12]:
amlcluster = AzureMLCluster(
      workspace = ws
    , compute_target = ct
    , initial_node_count = 2
    , experiment_name = exp_name
    , environment_definition = env
    , scheduler_idle_timeout = 7200 ## close the cluster after 2h if not used
    , datastores = [ws.datastores['datafileshare']]
)

############################## Setting up cluster ##############################
########################## Submitting the experiment ###########################
####################### Waiting for scheduler node's IP ########################
................


########################## Scheduler: 10.2.0.54:8786 ###########################
############################### On the same VNET ###############################
########################### Connections established ############################
############################# Scaling to 2 workers #############################
############################### Scaling is done ################################


In [13]:
amlcluster

VBox(children=(HTML(value='<h2>AzureMLCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n  …

In [None]:
amlcluster.close()