In [1]:
## SET UP AZUREML DETAILS
# imports
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core import Workspace, Environment, Experiment, Dataset, ScriptRunConfig

# set up workspace
config_path = '../../utils/config_GPU.json'
tenant_id = '72f988bf-86f1-41af-91ab-2d7cd011db47'  # this is outputted post `az login`
interactive_auth = InteractiveLoginAuthentication(tenant_id=tenant_id)  # create log-in object
ws = Workspace.from_config(path=config_path, auth=interactive_auth)  # link workspace

# set up environment
# - obtain environment.yml from `conda env export > environment.yml`
env_name = 'SampleEnv'
env_path = '../../utils/environment_case_study_cuml.yml'
env = Environment.from_conda_specification(name=env_name, file_path=env_path)
# - set docker from curate environment
env.docker.enabled = True
env.docker.base_image = 'mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04'

# set up experiment
experiment_name = 'AnomalyDetection'
exp = Experiment(workspace=ws, name=experiment_name)

# set up dataset
dataset_path = 'http://kdd.ics.uci.edu/databases/kddcup99/kddcup.data_10_percent.gz'
ds = Dataset.File.from_files(dataset_path)

# set up run
src_dir = '../../src/case_study_cuml'
src_name = 'azure_cuml_case_study.py'
compute_name = 'gpu-compute-one'
arguments = ['--data-path', ds.as_mount()]
src = ScriptRunConfig(source_directory=src_dir, script=src_name, compute_target=compute_name,
                      environment=env, arguments=arguments)


If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


In [2]:
## SUBMIT THE RUN
from azureml.widgets import RunDetails

run = exp.submit(src)  # submit it to the azureml platform
RunDetails(run).show()  # monitor the steps


_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [3]:
# show the outputs
run.wait_for_completion(show_output=True)


RunId: AnomalyDetection_1622692392_97cac9a1
Web View: https://ml.azure.com/experiments/AnomalyDetection/runs/AnomalyDetection_1622692392_97cac9a1?wsid=/subscriptions/92c76a2f-0e1c-4216-b65e-abf7a3f34c1e/resourcegroups/AzureML_UW_NLP/workspaces/AZ_ML

Streaming azureml-logs/55_azureml-execution-tvmps_076fac6110b3404e7366ab9a99661e18ea475b1477d539123458be6f66aa57e7_p.txt

2021-06-03T03:53:26Z Successfully mounted a/an Blobfuse File System at /mnt/batch/tasks/shared/LS_root/jobs/az_ml/azureml/anomalydetection_1622692392_97cac9a1/mounts/workspaceblobstore
2021-06-03T03:53:27Z Failed to start nvidia-fabricmanager due to exit status 5 with output Failed to start nvidia-fabricmanager.service: Unit nvidia-fabricmanager.service not found.
. Please ignore this if the GPUs don't utilize NVIDIA® NVLink® switches.
2021-06-03T03:53:27Z Starting output-watcher...
2021-06-03T03:53:27Z IsDedicatedCompute == False, starting polling for Low-Pri Preemption
2021-06-03T03:53:27Z Executing 'Copy ACR Details 


Streaming azureml-logs/75_job_post-tvmps_076fac6110b3404e7366ab9a99661e18ea475b1477d539123458be6f66aa57e7_p.txt

[2021-06-03T03:54:04.038541] Entering job release
[2021-06-03T03:54:04.679549] Starting job release
[2021-06-03T03:54:04.679903] Logging experiment finalizing status in history service.[2021-06-03T03:54:04.680022] job release stage : upload_datastore starting...
[2021-06-03T03:54:04.680163] job release stage : start importing azureml.history._tracking in run_history_release.Starting the daemon thread to refresh tokens in background for process with pid = 298

[2021-06-03T03:54:04.680580] job release stage : copy_batchai_cached_logs starting...
[2021-06-03T03:54:04.680713] job release stage : execute_job_release starting...

[2021-06-03T03:54:04.682652] job release stage : copy_batchai_cached_logs completed...
[2021-06-03T03:54:04.731721] Entering context manager injector.
[2021-06-03T03:54:04.733048] job release stage : upload_datastore completed...
[2021-06-03T03:54:04.790

{'runId': 'AnomalyDetection_1622692392_97cac9a1',
 'target': 'gpu-compute-one',
 'status': 'Completed',
 'startTimeUtc': '2021-06-03T03:53:25.253066Z',
 'endTimeUtc': '2021-06-03T03:54:16.118181Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': 'b13105cc-8af8-4be5-8bba-ef69bb0117cc',
  'azureml.git.repository_uri': 'https://github.com/danielgchen/MS_AZML_Anomaly_Detection.git',
  'mlflow.source.git.repoURL': 'https://github.com/danielgchen/MS_AZML_Anomaly_Detection.git',
  'azureml.git.branch': 'master',
  'mlflow.source.git.branch': 'master',
  'azureml.git.commit': '6fd03eb518a0d0b1d18bbece312c7aa8cdd8fd09',
  'mlflow.source.git.commit': '6fd03eb518a0d0b1d18bbece312c7aa8cdd8fd09',
  'azureml.git.dirty': 'True',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [{'dataset': {'id': 'fb35714e-4e3e-4096-8b08-6ce08fc320c1'}, 'consumptionDetails': {'type': 'RunInput', 'inp