In [1]:
import logging
import os
import tempfile

import numpy as np
import pandas as pd

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.core.script_run_config import ScriptRunConfig
from azureml.core.run import Run
from azureml.data.datapath import DataPath
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.model_proxy import ModelProxy

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [16]:
ws = Workspace.from_config()

# Choose an experiment name.
experiment_name = 'automl-iris'

experiment = Experiment ( ws , experiment_name )

output = {}
output [ 'Subscription ID' ] = ws.subscription_id
output [ 'Workspace Name' ] = ws.name
output [ 'Resource Group' ] = ws.resource_group
output [ 'Location' ] = ws.location
output [ 'Experiment Name' ] = experiment.name
output [ 'AMLS version' ] = azureml.core.VERSION

print ( output )

{'Subscription ID': '4499226a-31e0-4c35-a5f4-323521d98b5b', 'Workspace Name': 'awml', 'Resource Group': 'aw', 'Location': 'eastus2', 'Experiment Name': 'automl-iris', 'AMLS version': '1.38.0'}


In [3]:
train = pd.read_csv ( 'automl-iris-data/iris.csv' )
train.head ()

Unnamed: 0,class,sepal_length,sepal_width,petal_length,petal_width
0,iris-setosa,5.1,3.5,1.4,0.2
1,iris-setosa,4.9,3.0,1.4,0.2
2,iris-setosa,4.7,3.2,1.3,0.2
3,iris-setosa,4.6,3.1,1.5,0.2
4,iris-setosa,5.0,3.6,1.4,0.2


In [5]:
data_train, data_test = train_test_split ( train , test_size=.2, stratify = train [ 'class' ] )
data_train, data_val = train_test_split ( data_train , test_size=.25, stratify = data_train [ 'class' ] )

blobstore_datadir = data_dir = 'automl-iris-data'  # Local directory to store data

if not os.path.isdir ( data_dir ) :
    os.mkdir ( data_dir )

train_data_fname = data_dir + '/train_data.csv'
val_data_fname = data_dir + '/val_data.csv'
test_data_fname = data_dir + '/test_data.csv'

data_train.to_csv ( train_data_fname , index = False )
data_val.to_csv ( val_data_fname , index = False )
data_test.to_csv ( test_data_fname , index = False )

datastore = ws.get_default_datastore ()
target = DataPath (
    datastore = datastore , path_on_datastore = blobstore_datadir , name = data_dir
)

Dataset.File.upload_directory(
    src_dir = data_dir , target = target , overwrite = True , show_progress = True
)

Validating arguments.
Arguments validated.
Uploading file to automl-iris-data
Uploading an estimated of 6 files
Uploading automl-iris-data/iris.csv
Uploaded automl-iris-data/iris.csv, 1 files out of an estimated total of 6
Uploading automl-iris-data/val_data.csv
Uploaded automl-iris-data/val_data.csv, 2 files out of an estimated total of 6
Uploading automl-iris-data/.amlignore
Uploaded automl-iris-data/.amlignore, 3 files out of an estimated total of 6
Uploading automl-iris-data/.amlignore.amltmp
Uploaded automl-iris-data/.amlignore.amltmp, 4 files out of an estimated total of 6
Uploading automl-iris-data/test_data.csv
Uploaded automl-iris-data/test_data.csv, 5 files out of an estimated total of 6
Uploading automl-iris-data/train_data.csv
Uploaded automl-iris-data/train_data.csv, 6 files out of an estimated total of 6
Uploaded 6 files
Creating new dataset


{
  "source": [
    "('workspaceblobstore', 'automl-iris-data')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ]
}

In [6]:
train_dataset = Dataset.Tabular.from_delimited_files (
    path = [ ( datastore , blobstore_datadir + '/train_data.csv' ) ]
)

val_dataset = Dataset.Tabular.from_delimited_files (
    path = [ ( datastore , blobstore_datadir + '/val_data.csv' ) ]
)

test_dataset = Dataset.Tabular.from_delimited_files (
    path = [ ( datastore , blobstore_datadir + '/test_data.csv' ) ]
)

train_dataset = train_dataset.register (
    workspace = ws ,
    name = 'automl_iris_data_train' ,
    description = 'automl_iris_data_train' ,
    create_new_version = True ,
)

val_dataset = val_dataset.register (
    workspace = ws ,
    name = 'automl_iris_data_val' ,
    description = 'automl_iris_data_val' ,
    create_new_version = True ,
)

test_dataset = test_dataset.register (
    workspace = ws ,
    name = 'automl_iris_data_test' ,
    description = 'automl_iris_data_test' ,
    create_new_version = True ,
)

In [7]:
compute_target = ComputeTarget ( workspace = ws , name = 'automl-training' )

In [8]:
automl_settings = {
    'verbosity' : logging.INFO ,
}

automl_config = AutoMLConfig (
    task = 'classification' ,
    primary_metric = 'AUC_weighted' ,
    experiment_timeout_minutes = 30 ,
    debug_log= 'automl_errors.log' ,
    compute_target = compute_target ,
    training_data = train_dataset ,
    validation_data = val_dataset ,
    label_column_name = 'class' ,
    **automl_settings
)

In [9]:
automl_run = experiment.submit ( automl_config , show_output = False )
_ = automl_run.wait_for_completion ( show_output = False )

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
automl-iris,AutoML_55c202c5-e213-4d92-8190-7694e0986a5b,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


In [10]:
validation_metrics = automl_run.get_metrics ()
validation_metrics

{'experiment_status': ['DatasetEvaluation',
  'FeaturesGeneration',
  'DatasetFeaturization',
  'DatasetFeaturizationCompleted',
  'ModelSelection',
  'BestRunExplainModel',
  'ModelExplanationDataSetSetup',
  'PickSurrogateModel',
  'EngineeredFeatureExplanations',
  'EngineeredFeatureExplanations',
  'RawFeaturesExplanations',
  'RawFeaturesExplanations',
  'BestRunExplainModel'],
 'experiment_status_description': ['Gathering dataset statistics.',
  'Generating features for the dataset.',
  'Beginning to fit featurizers and featurize the dataset.',
  'Completed fit featurizers and featurizing the dataset.',
  'Beginning model selection.',
  'Best run model explanations started',
  'Model explanations data setup completed',
  'Choosing LightGBM as the surrogate model for explanations',
  'Computation of engineered features started',
  'Computation of engineered features completed',
  'Computation of raw features started',
  'Computation of raw features completed',
  'Best run model ex

In [11]:
best_run , best_model = automl_run.get_output ()
best_run



Experiment,Id,Type,Status,Details Page,Docs Page
automl-iris,AutoML_55c202c5-e213-4d92-8190-7694e0986a5b_7,,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [19]:
model_proxy = ModelProxy ( child_run = best_run , compute_target = compute_target )
predictions, metrics = model_proxy.test ( test_dataset ,  include_predictions_only = True )



KeyboardInterrupt: 