# Accelerated Machine Learning at Scale with NVIDIA RAPIDS on Microsoft Azure ML

## Tom Drabas (Microsoft), Manuel Reyes-Gomez (NVIDIA)

### Imports

In [33]:
from azureml.core import Workspace, Experiment, Datastore, Dataset, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import MpiConfiguration
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.authentication import InteractiveLoginAuthentication
from dask_cloudprovider import AzureMLCluster

import os

### Setup

#### Workspace

Documentation: [Workspace](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace.workspace?view=azure-ml-py)

In [34]:
subscription_id = '6560575d-fa06-4e7d-95fb-f962e74efd7a'
resource_group = 'azure-sandbox'
workspace_name = 'todrabas_UK_STH'

ws = Workspace(
    workspace_name=workspace_name
    , subscription_id=subscription_id
    , resource_group=resource_group
)

#### Parameters

In [35]:
workshop_id = 1                      # REPLACE

########################################################
### DO NOT CHANGE ANYTHING BELOW
########################################################

name = f'GTC20-{workshop_id:03d}'.lower()

### vnet settings
vnet_rg = ws.resource_group
vnet_name = 'todrabas_UK_STH_VN'
subnet_name = 'default'

### azure ml names
ct_name  = f'{name}-ct'
exp_name = f'{name}-dask-demo'

### credentials
admin_username = name.split('-')[0]
admin_ssh_key_pub = '../../../ssh_key/gtc20_rsa.pub'
admin_ssh_key_priv = '../../../ssh_key/gtc20_rsa'

### trust but verify
verify = f'''
Name: {name}

vNET RG: {vnet_rg}
vNET name: {vnet_name}
vNET subnet name: {subnet_name}

Compute target: {ct_name}
Experiment name: {exp_name}

Admin login name: {admin_username}
'''

print(verify)


Name: gtc20-001

vNET RG: azure-sandbox
vNET name: todrabas_UK_STH_VN
vNET subnet name: default

Compute target: gtc20-001-ct
Experiment name: gtc20-001-dask-demo

Admin login name: gtc20



#### Compute target
Documentation: [Compute target](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.compute.amlcompute(class)?view=azure-ml-py)

In [37]:
vm_name = 'STANDARD_NC6S_V3'
gpus_per_node = 1

with open(admin_ssh_key_pub, 'r') as f:
    ssh_key_pub = f.read().strip()
    
if ct_name not in ws.compute_targets:
    # create config for Azure ML cluster
    # change properties as needed
    config = AmlCompute.provisioning_configuration(
          vm_size=vm_name
        , min_nodes=0
        , max_nodes=2
        , vnet_resourcegroup_name=vnet_rg
        , vnet_name=vnet_name
        , subnet_name=subnet_name
        , idle_seconds_before_scaledown=300
        , admin_username=admin_username
        , admin_user_ssh_key=ssh_key_pub
        , remote_login_port_public_access='Enabled'   ### can switch to 'Disabled' if machine submitting this runs in the same VNET
    )
    ct = ComputeTarget.create(ws, ct_name, config)
    ct.wait_for_completion(show_output=True)
else:
    ct = ws.compute_targets[ct_name]

#### Environment
Documentation: [Environment](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.environment?view=azure-ml-py)

In [38]:
environment_name='gtc20_GPU_ENV'
docker_image='todrabas/aml_rapids:latest'
python_interpreter = '/opt/conda/envs/rapids/bin/python'
conda_packages = ['matplotlib']

if environment_name not in ws.environments:
    env = Environment(name=environment_name)
    env.docker.enabled = True
    env.docker.base_image = docker_image

    env.python.interpreter_path = python_interpreter
    env.python.user_managed_dependencies = True
 
    conda_dep = CondaDependencies()

    for conda_package in conda_packages:
        conda_dep.add_conda_package(conda_package)

    env.python.conda_dependencies = conda_dep
    env.register(workspace=ws)
    evn = env
else:
    env = ws.environments[environment_name]

### NYC Taxi data

In [41]:
get_data = True
years = ['2016']

In [46]:
if get_data:
    import nyc_data
    
    print(' Downloading data '.center(80, '#'))
    print()
    
    nyc_data.download_nyctaxi_data(years, os.getcwd())
    
    print(' Uploading data '.center(80, '#'))
    print()
    nyc_data.upload_nyctaxi_data(
        ws
        , ws.get_default_datastore()
        , os.path.join(os.getcwd(), 'nyctaxi')
        , os.path.join('data', 'nyctaxi')
    )

############################### Downloading data ###############################

--> Downloading https://storage.googleapis.com/anaconda-public-data/nyc-taxi/csv/2016/yellow_tripdata_2016-01.csv <--
#######-->> yellow_tripdata_2016-01.csv file already exists locally <<--########
--> Downloading https://storage.googleapis.com/anaconda-public-data/nyc-taxi/csv/2016/yellow_tripdata_2016-02.csv <--
#######-->> yellow_tripdata_2016-02.csv file already exists locally <<--########
--> Downloading https://storage.googleapis.com/anaconda-public-data/nyc-taxi/csv/2016/yellow_tripdata_2016-03.csv <--
#######-->> yellow_tripdata_2016-03.csv file already exists locally <<--########
--> Downloading https://storage.googleapis.com/anaconda-public-data/nyc-taxi/csv/2016/yellow_tripdata_2016-04.csv <--
#######-->> yellow_tripdata_2016-04.csv file already exists locally <<--########
--> Downloading https://storage.googleapis.com/anaconda-public-data/nyc-taxi/csv/2016/yellow_tripdata_2016-05.csv <--
####



Uploaded /Users/drabast/Documents/Programming/playground/python/GTC/SJ_2020/WORKSHOP/1_Setup/nyctaxi/2016/yellow_tripdata_2016-02.csv, 1 files out of an estimated total of 6
Uploaded /Users/drabast/Documents/Programming/playground/python/GTC/SJ_2020/WORKSHOP/1_Setup/nyctaxi/2016/yellow_tripdata_2016-01.csv, 2 files out of an estimated total of 6
Uploaded /Users/drabast/Documents/Programming/playground/python/GTC/SJ_2020/WORKSHOP/1_Setup/nyctaxi/2016/yellow_tripdata_2016-04.csv, 3 files out of an estimated total of 6
Uploaded /Users/drabast/Documents/Programming/playground/python/GTC/SJ_2020/WORKSHOP/1_Setup/nyctaxi/2016/yellow_tripdata_2016-03.csv, 4 files out of an estimated total of 6
Uploaded /Users/drabast/Documents/Programming/playground/python/GTC/SJ_2020/WORKSHOP/1_Setup/nyctaxi/2016/yellow_tripdata_2016-06.csv, 5 files out of an estimated total of 6
Uploaded /Users/drabast/Documents/Programming/playground/python/GTC/SJ_2020/WORKSHOP/1_Setup/nyctaxi/2016/yellow_tripdata_2016-05.

### Create Azure ML Dask cluster

In [7]:
amlcluster = AzureMLCluster(
              workspace=ws
            , compute_target=ct
            , initial_node_count=1
            , experiment_name=exp_name
            , environment_definition=env
            , use_gpu=True
            , n_gpus_per_node=1
            , admin_username=admin_username
            , admin_ssh_key=admin_ssh_key_priv
            , asynchronous=False
        )

############################## Setting up cluster ##############################
########################## Submitting the experiment ###########################
####################### Waiting for scheduler node's IP ########################
........................................................................................


########################### Scheduler: 10.7.0.5:8786 ###########################
Checking connection...
############################# Not on the same VNET #############################
########################### Connections established ############################
############################# Scaling to 1 workers #############################
############################### Scaling is done ################################


In [8]:
amlcluster

VBox(children=(HTML(value='<h2>AzureMLCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n  …

In [9]:
amlcluster.close()

################### Scheduler and workers are disconnected. ####################
