In [1]:
import os
from azureml.core import Workspace, Experiment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.data.data_reference import DataReference
from azureml.core.runconfig import RunConfiguration
from azureml.core import ScriptRunConfig
import json

In [2]:
with open('config/config.json', 'r') as f:
    config = json.loads(f.read())
    
subscription_id = config["SUBSCRIPTION_ID"]
resource_group = config["RESOURCE_GROUP"]
workspace_name = config["WORKSPACE_NAME"]
gpu_cluster_name = config["GPU_CLUSTER_NAME"]

ws = Workspace(workspace_name=workspace_name, subscription_id=subscription_id, resource_group=resource_group)

In [3]:
scripts_folder = "scripts"

if gpu_cluster_name in ws.compute_targets:
    gpu_cluster = ws.compute_targets[gpu_cluster_name]
    
    if gpu_cluster and type(gpu_cluster) is AmlCompute:
        print('Compute target found. Using: ' + gpu_cluster_name)
else:
    print("Creating new cluster")
    
    # vm_size parameter below could be modified to one of the RAPIDS-supported VM types
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "Standard_NC6s_v2", min_nodes=1, max_nodes = 1)

    # create the cluster
    gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, provisioning_config)
    gpu_cluster.wait_for_completion(show_output=True)

Compute target found. Using: gpu-todrabas


In [4]:
file_root = 'mortgage_np'
ds = ws.get_default_datastore()

# data already uploaded to the datastore
data_ref = DataReference(data_reference_name='data', datastore=ds, path_on_datastore=file_root)

In [5]:
run_config = RunConfiguration()
run_config.framework = 'python'
run_config.environment.python.user_managed_dependencies = True
run_config.environment.python.interpreter_path = '/conda/envs/rapids/bin/python'
run_config.target = gpu_cluster_name
run_config.environment.docker.enabled = True
run_config.environment.docker.gpu_support = True
run_config.environment.docker.base_image = "todrabas/mlads_rapids:cuda9.2-runtime-ubuntu18.04"
# run_config.environment.docker.base_image = "rapidsai/rapidsai:cuda9.2-runtime-ubuntu18.04"
run_config.environment.spark.precache_packages = False
run_config.data_references={'data':data_ref.to_config()}

In [9]:
src = ScriptRunConfig(source_directory=scripts_folder, 
                          script='2_pandasVsRapids_DBSCAN.py', 
                          arguments = ['--gpu', 1, '--data_dir', str(data_ref), '--ncols', 256],
                          run_config=run_config
                         )

exp = Experiment(ws, 'rapidstest_dbscan_gpu')
run = exp.submit(config=src)
run.wait_for_completion(show_output=True)

RunId: rapidstest_dbscan_gpu_1559597652_94334083
Web View: https://mlworkspace.azure.ai/portal/subscriptions/15ae9cb6-95c1-483d-a0e3-b1a1a3b06324/resourceGroups/MLADS_todrabas/providers/Microsoft.MachineLearningServices/workspaces/todrabas_MLADS_WE/experiments/rapidstest_dbscan_gpu/runs/rapidstest_dbscan_gpu_1559597652_94334083

Streaming azureml-logs/80_driver_log.txt

Running DBSCAN on GPU...
/mnt/batch/tasks/shared/LS_root/jobs/todrabas_mlads_we/azureml/rapidstest_dbscan_gpu_1559597652_94334083/mounts/workspaceblobstore/mortgage_np/mortgage.csv
Total DBSCAN Time on GPU: 0:00:02.019758


The experiment completed successfully. Finalizing run...
Logging experiment finalizing status in history service.
Cleaning up all outstanding Run operations, waiting 300.0 seconds
2 items cleaning up...
Cleanup took 0.25251340866088867 seconds

Execution Summary
RunId: rapidstest_dbscan_gpu_1559597652_94334083
Web View: https://mlworkspace.azure.ai/portal/subscriptions/15ae9cb6-95c1-483d-a0e3-b1a1a3b

{'runId': 'rapidstest_dbscan_gpu_1559597652_94334083',
 'target': 'gpu-todrabas',
 'status': 'Completed',
 'startTimeUtc': '2019-06-03T21:34:24.339979Z',
 'endTimeUtc': '2019-06-03T21:34:49.332287Z',
 'properties': {'azureml.runsource': 'experiment',
  'ContentSnapshotId': 'ec1b4558-e40f-43a9-892d-ce42f18e8b75',
  'azureml.git.repository_uri': 'git@github.com:drabastomek/MLADS_RAPIDS.git',
  'mlflow.source.git.repoURL': 'git@github.com:drabastomek/MLADS_RAPIDS.git',
  'azureml.git.branch': 'devel',
  'mlflow.source.git.branch': 'devel',
  'azureml.git.commit': 'ba3ab5b273cbdf8a5bcd3345a4a043542de4442c',
  'mlflow.source.git.commit': 'ba3ab5b273cbdf8a5bcd3345a4a043542de4442c',
  'azureml.git.dirty': 'True'},
 'runDefinition': {'script': '2_pandasVsRapids_DBSCAN.py',
  'arguments': ['--gpu',
   '1',
   '--data_dir',
   '$AZUREML_DATAREFERENCE_data',
   '--ncols',
   '256'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'gpu-todrabas',


In [18]:
src = ScriptRunConfig(source_directory=scripts_folder, 
                          script='2_pandasVsRapids_DBSCAN.py', 
                          arguments = ['--gpu', 0, '--data_dir', str(data_ref), '--ncols', 256],
                          run_config=run_config
                         )

exp = Experiment(ws, 'rapidstest_dbscan_cpu')
run = exp.submit(config=src)
run.wait_for_completion(show_output=True)

RunId: rapidstest_dbscan_cpu_1559596009_2432f4fb
Web View: https://mlworkspace.azure.ai/portal/subscriptions/15ae9cb6-95c1-483d-a0e3-b1a1a3b06324/resourceGroups/MLADS_todrabas/providers/Microsoft.MachineLearningServices/workspaces/todrabas_MLADS_WE/experiments/rapidstest_dbscan_cpu/runs/rapidstest_dbscan_cpu_1559596009_2432f4fb

Streaming azureml-logs/80_driver_log.txt

Running DBSCAN on CPU...
/mnt/batch/tasks/shared/LS_root/jobs/todrabas_mlads_we/azureml/rapidstest_dbscan_cpu_1559596009_2432f4fb/mounts/workspaceblobstore/mortgage_np/mortgage.csv
Total DBSCAN Time on CPU: 0:01:38.770212


The experiment completed successfully. Finalizing run...
Logging experiment finalizing status in history service.

Execution Summary
RunId: rapidstest_dbscan_cpu_1559596009_2432f4fb
Web View: https://mlworkspace.azure.ai/portal/subscriptions/15ae9cb6-95c1-483d-a0e3-b1a1a3b06324/resourceGroups/MLADS_todrabas/providers/Microsoft.MachineLearningServices/workspaces/todrabas_MLADS_WE/experiments/rapidstes

{'runId': 'rapidstest_dbscan_cpu_1559596009_2432f4fb',
 'target': 'gpu-todrabas',
 'status': 'Completed',
 'startTimeUtc': '2019-06-03T21:06:59.849828Z',
 'endTimeUtc': '2019-06-03T21:08:57.465319Z',
 'properties': {'azureml.runsource': 'experiment',
  'ContentSnapshotId': '0d743ad6-71a4-4bbc-b70c-d8bc06c719fe',
  'azureml.git.repository_uri': 'git@github.com:drabastomek/MLADS_RAPIDS.git',
  'mlflow.source.git.repoURL': 'git@github.com:drabastomek/MLADS_RAPIDS.git',
  'azureml.git.branch': 'devel',
  'mlflow.source.git.branch': 'devel',
  'azureml.git.commit': 'ba3ab5b273cbdf8a5bcd3345a4a043542de4442c',
  'mlflow.source.git.commit': 'ba3ab5b273cbdf8a5bcd3345a4a043542de4442c',
  'azureml.git.dirty': 'True'},
 'runDefinition': {'script': '2_pandasVsRapids_DBSCAN.py',
  'arguments': ['--gpu',
   '0',
   '--data_dir',
   '$AZUREML_DATAREFERENCE_data',
   '--ncols',
   '256'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'gpu-todrabas',
