# RAPIDS Taxi demo on AzureML

References:
 - https://github.com/drabastomek/MLADS_RAPIDS/blob/master/notebook/1_pandasVsRapids_ETL.ipynb
 - https://github.com/danielsc/azureml-and-dask/blob/master/LoadDataFromDatastore.ipynb
 - https://github.com/danielsc/azureml-and-dask/blob/master/StartDask.ipynb

In [2]:
import os
import json
import time

from azureml.core import Workspace, Experiment, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.data.data_reference import DataReference
from azureml.core.runconfig import RunConfiguration, MpiConfiguration
from azureml.core import ScriptRunConfig
from azureml.train.estimator import Estimator

In [3]:
with open('config.json', 'r') as f:
    config = json.loads(f.read())
    
subscription_id = config["subscription_id"]
resource_group = config["resource_group"]
workspace_name = config["workspace_name"]
gpu_cluster_name = "kdd-gpu-cluster"

ws = Workspace(workspace_name=workspace_name, subscription_id=subscription_id, resource_group=resource_group)

## Build cluster

In [4]:
print("Creating new cluster")

provisioning_config = AmlCompute.provisioning_configuration(vm_size = "Standard_ND12s", min_nodes=1, max_nodes = 1)
gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, provisioning_config)

print("waiting for nodes")
gpu_cluster.wait_for_completion(show_output=True)

Creating new cluster
waiting for nodes
Creating
Succeeded..............
AmlCompute wait for completion finished
Minimum number of nodes requested have been provisioned


## Test simple dask

In [15]:
mpi_config = MpiConfiguration()
mpi_config.process_count_per_node = 2

env = Environment('rapids')
env.python.user_managed_dependencies = True
env.python.interpreter_path = '/conda/envs/rapids/bin/python'
env.docker.enabled = True
env.docker.gpu_support = True
env.docker.base_image = 'jacobtomlinson/azureml_rapids:latest'
env.spark.precache_packages = False

est = Estimator(source_directory='./dask',
                compute_target=gpu_cluster,
                entry_script='startDask.py',
                script_params={'--data': ws.get_default_datastore()},
                node_count=1,
                distributed_training=mpi_config,  
                environment_definition=env)

run = Experiment(ws, 'dask').submit(est)

In [23]:
while not 'headnode' in run.get_metrics():
    if run.get_status() == 'Failed':
        raise RuntimeError("Experiment failed to start.")
    print("waiting for scheduler node's ip")
    time.sleep(30)

print('Headnode has IP:', run.get_metrics()['headnode'])

RuntimeError: Experiment failed to start.

In [28]:
run.cancel()

## Delete cluster

In [None]:
gpu_cluster.delete()
gpu_cluster.wait_for_completion(show_output=True)

Deleting