# Accelerated Machine Learning at Scale with NVIDIA RAPIDS on Microsoft Azure ML

## Tom Drabas (Microsoft), Manuel Reyes-Gomez (NVIDIA)

### Imports

In [1]:
import azureml_setup
from dask_cloudprovider import AzureMLCluster

### Setup

#### Workspace

Documentation: [Workspace](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace.workspace?view=azure-ml-py)

In [2]:
ws = azureml_setup.get_workspace()

#### Parameters

In [3]:
workshop_id = 1                      # REPLACE

########################################################
### DO NOT CHANGE ANYTHING BELOW
########################################################

name = f'GTC20-{workshop_id:03d}'.lower()

### vnet settings
vnet_rg = ws.resource_group
vnet_name = 'todrabas_UK_STH_VN'
subnet_name = 'default'

### azure ml names
ct_name  = f'{name}-ct'
exp_name = f'{name}-dask-demo'

### credentials
admin_username = name.split('-')[0]
admin_ssh_key_pub = '../../../ssh_key/gtc20_rsa.pub'
admin_ssh_key_priv = '../../../ssh_key/gtc20_rsa'

### trust but verify
verify = f'''
Name: {name}

vNET RG: {vnet_rg}
vNET name: {vnet_name}
vNET subnet name: {subnet_name}

Compute target: {ct_name}
Experiment name: {exp_name}

Admin login name: {admin_username}
'''

print(verify)


Name: gtc20-001

vNET RG: azure-sandbox
vNET name: todrabas_UK_STH_VN
vNET subnet name: default

Compute target: gtc20-001-ct
Experiment name: gtc20-001-dask-demo

Admin login name: gtc20



#### Compute target
Documentation: [Compute target](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.compute.amlcompute(class)?view=azure-ml-py)

In [4]:
# ws = get_workspace()
ct = azureml_setup.get_compute_target(
      ws
    , ct_name
    , vm_name='STANDARD_NC6S_V3'
    , min_nodes=0
    , max_nodes=2
    , vnet_rg=vnet_rg
    , vnet_name=vnet_name
    , subnet_name=subnet_name
    , admin_username=admin_username
    , ssh_key_pub=admin_ssh_key_pub
)

#### Environment
Documentation: [Environment](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.environment?view=azure-ml-py)

In [5]:
env = azureml_setup.get_environment(ws, environment_name = 'gtc20_GPU_ENV')

### NYC Taxi data

In [6]:
# download_and_upload_data(ws)

### Create Azure ML Dask cluster

In [8]:
amlcluster = AzureMLCluster(
      workspace = ws
    , compute_target = ct
    , initial_node_count = 2
    , experiment_name = exp_name
    , environment_definition = env
    , admin_username = admin_username
    , admin_ssh_key = admin_ssh_key_priv
    , scheduler_idle_timeout = 3600 ## close the cluster after 1h if not used
    , datastores = [ws.datastores['datafiles']]
)

############################## Setting up cluster ##############################
########################## Submitting the experiment ###########################
####################### Waiting for scheduler node's IP ########################
.....


########################### Scheduler: 10.7.0.7:8786 ###########################
############################# Not on the same VNET #############################
########################### Connections established ############################
############################# Scaling to 2 workers #############################
############################### Scaling is done ################################


In [9]:
amlcluster

VBox(children=(HTML(value='<h2>AzureMLCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n  …

In [12]:
amlcluster.close()

################### Scheduler and workers are disconnected. ####################
