In [5]:
!cat ../utils/config.json

{
    "subscription_id": "6560575d-fa06-4e7d-95fb-f962e74efd7a",
    "resource_group": "UW_AnomalyDetection",
    "workspace_name": "AnomalyDetection"
}

In [6]:
!cat ../utils/environment.yml

name: SampleEnv
channels:
  - defaults
  - anaconda
  - conda-forge
dependencies:
  - python=3.7
  - pip
  - pip:
    - mlflow
    - matplotlib
    - seaborn
    - sklearn
    - azureml-mlflow
    - azureml-dataprep

In [7]:
!cat ../src/azure_isolation_forest.py

import mlflow
import random
import argparse
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

# helper functions


# main method
if __name__ == "__main__":
    # set up argument parser
    parser = argparse.ArgumentParser()
    parser.add_argument('--data-path', type=str, help='path to the dataset')
    args = parser.parse_args()
    
    # read the data
    df = pd.read_csv(args.data_path, index_col=0)
    print(df.head())
#     # read the data
#     df = pd.read_csv('../data/kddcup.data_0_1_percent.csv', index_col=0)

#     # filter labels
#     labels = df['41']  # get labels
#     label_counts = labels.value_counts()  # n-obs / label
#     mask = label_counts > df.shape[0] * 0.05  # label must encompass > 5% of the data
#     valid_labels = label_counts.index[mask]  # get passing labels
#     print(f'Passing labels = {valid_label

In [1]:
## SET UP AZUREML DETAILS
# imports
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core import Workspace, Environment, Experiment, Dataset, ScriptRunConfig

# set up workspace
config_path = '../utils/config.json'
tenant_id = '72f988bf-86f1-41af-91ab-2d7cd011db47'  # this is outputted post `az login`
interactive_auth = InteractiveLoginAuthentication(tenant_id=tenant_id)  # create log-in object
ws = Workspace.from_config(path=config_path, auth=interactive_auth)  # link workspace

# set up environment
# - obtain environment.yml from `conda env export > environment.yml`
env_name = 'SampleEnv'
env_path = '../utils/environment.yml'
env = Environment.from_conda_specification(name=env_name, file_path=env_path)

# set up experiment
experiment_name = 'IsolationForestSample'
exp = Experiment(workspace=ws, name=experiment_name)

# set up dataset
dataset_path = 'http://kdd.ics.uci.edu/databases/kddcup99/kddcup.data_10_percent.gz'
ds = Dataset.File.from_files(dataset_path)

# set up run
src_dir = '../src'
src_name = 'azure_isolation_forest.py'
compute_name = 'WorkspaceCompute'
arguments = ['--data-path', ds.as_mount()]
src = ScriptRunConfig(source_directory=src_dir, script=src_name, compute_target=compute_name,
                      environment=env, arguments=arguments)


If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


In [2]:
## SUBMIT THE RUN
from azureml.widgets import RunDetails

run = exp.submit(src)  # submit it to the azureml platform
RunDetails(run).show()  # monitor the steps


_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [3]:
# show the outputs
run.wait_for_completion(show_output=True)


RunId: IsolationForestSample_1617239084_43e798cf
Web View: https://ml.azure.com/experiments/IsolationForestSample/runs/IsolationForestSample_1617239084_43e798cf?wsid=/subscriptions/6560575d-fa06-4e7d-95fb-f962e74efd7a/resourcegroups/UW_AnomalyDetection/workspaces/AnomalyDetection

Streaming azureml-logs/55_azureml-execution-tvmps_9a19018db32945838db2d68ed9900a0e31da94d13a20d7c6950cde31febb7b4c_d.txt

2021-04-01T01:10:41Z Successfully mounted a/an Blobfuse File System at /mnt/batch/tasks/shared/LS_root/jobs/anomalydetection/azureml/isolationforestsample_1617239084_43e798cf/mounts/workspaceblobstore
2021-04-01T01:10:43Z Starting output-watcher...
2021-04-01T01:10:43Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
2021-04-01T01:10:43Z Executing 'Copy ACR Details file' on 10.0.0.4
2021-04-01T01:10:43Z Copy ACR Details file succeeded on 10.0.0.4. Output: 
>>>   
>>>   
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_25e6cb78b6103af17d967d864cd0a


Execution Summary
RunId: IsolationForestSample_1617239084_43e798cf
Web View: https://ml.azure.com/experiments/IsolationForestSample/runs/IsolationForestSample_1617239084_43e798cf?wsid=/subscriptions/6560575d-fa06-4e7d-95fb-f962e74efd7a/resourcegroups/UW_AnomalyDetection/workspaces/AnomalyDetection



{'runId': 'IsolationForestSample_1617239084_43e798cf',
 'target': 'WorkspaceCompute',
 'status': 'Completed',
 'startTimeUtc': '2021-04-01T01:10:46.142735Z',
 'endTimeUtc': '2021-04-01T01:15:22.322323Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': '7d19d761-f753-49bd-a31b-392c8b3a3651',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [{'dataset': {'id': 'd4023713-1cdf-4f6e-8670-7fd800a85d2c'}, 'consumptionDetails': {'type': 'RunInput', 'inputName': 'input__109c1a88', 'mechanism': 'Mount'}}],
 'outputDatasets': [],
 'runDefinition': {'script': 'azure_isolation_forest.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--data-path', 'DatasetConsumptionConfig:input__109c1a88'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'WorkspaceCompute',
  'dataReferences': {},
  'data': {'input__109c1a88': {'da