## Azure Machine Learning and Pipeline SDK-specific imports

In [4]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.19.0


## Initialize Workspace
Initialize a workspace object with _subscription_id_, _workspace_name_ and  _resourcegroup_name_ from .\config.json then perform _interactive authentication_.

In [5]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

mlws-phd
ML-PHD
westeurope
4f90113f-c88e-4cd8-9b87-9f1b78abca6a


In [6]:
# Experiment name in the workspace.
experiment_name = 'UltrasonicVision-AutoML'
project_folder = './UltrasonicVision-AutoML'

experiment = Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
UltrasonicVision-AutoML,mlws-phd,Link to Azure Machine Learning studio,Link to Documentation


### Create or Attach an Compute cluster
AutoML will need a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) to run.

In [7]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

amlcompute_cluster_name = "cpu-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 0, timeout_in_minutes = 10)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Data

In [8]:
# Try to load the dataset from the Workspace. Otherwise, create it from the file
found = False
key = "Ultrasonic-vision-automl"
description_text = "Ultrasonic vision training dataset"
train_data_url = 'https://raw.githubusercontent.com/emanbuc/ultrasonic-vision/main/dataset/automl_train.csv'
test_data_url = 'https://raw.githubusercontent.com/emanbuc/ultrasonic-vision/main/dataset/automl_train.csv'

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
        # Create AML Dataset and register it into Workspace
        dataset = Dataset.Tabular.from_delimited_files(train_data_url)        
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,Time,HCSR04_001,HCSR04_002,HCSR04_003,HCSR04_004
count,52.0,52.0,52.0,52.0,52.0
mean,1609240000.0,95.676923,84.4625,90.763269,81.692692
std,0.8497626,50.480439,50.93628,49.744333,48.014654
min,1609240000.0,3.43,1.71,10.29,3.43
25%,1609240000.0,59.1625,49.3025,45.88,36.015
50%,1609240000.0,95.185,81.46,93.47,86.605
75%,1609240000.0,140.205,126.4775,137.63,120.9075
max,1609240000.0,171.5,164.64,169.79,169.79


### Review the Dataset Result

You can peek the result of a TabularDataset at any range using `skip(i)` and `take(j).to_pandas_dataframe()`. Doing so evaluates only `j` records for all the steps in the TabularDataset, which makes it fast even against large datasets.

`TabularDataset` objects are composed of a list of transformation steps (optional).

In [9]:
dataset.take(5).to_pandas_dataframe()

Unnamed: 0,Time,HCSR04_001,HCSR04_002,HCSR04_003,HCSR04_004,ObjectClass
0,1609239718,90.9,49.73,53.16,77.17,A
1,1609239718,12.0,8.57,18.87,92.61,A
2,1609239718,114.91,164.64,109.76,49.73,B
3,1609239718,150.92,54.88,36.01,94.33,B
4,1609239718,166.36,109.76,161.21,147.49,B


## Train


In [9]:
label_column = "ObjectClass"

automl_settings = {
    #"experiment_timeout_hours": 0.3,
    "experiment_timeout_minutes":15,
    "max_concurrent_iterations": 4,
    "primary_metric" : 'AUC_weighted'
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             enable_onnx_compatible_models=True,
                             label_column_name=label_column,   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

In [10]:

remote_run = experiment.submit(automl_config, show_output = False)

Running on remote.


In [12]:
remote_run.wait_for_completion()

{'runId': 'AutoML_d9a42d0e-1bd3-44bd-b975-960770366376',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2020-12-30T09:25:53.212698Z',
 'endTimeUtc': '2020-12-30T09:49:04.867773Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'AUC_weighted',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'cpu-cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"cd655eed-4a1a-436a-8083-00364746fdb5\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 4, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://raw.githubusercontent.com/emanbuc/ultrasonic-vision/main/dataset/automl_train.csv\\\\\\"}]}}, \\\\\\"localData\\\\\\": {}, \\\\\\"isEnabled\\\\\\": true, \\\\\\"n

In [10]:
from azureml.train.automl.run import AutoMLRun
remote_run = AutoMLRun(experiment=experiment, run_id='AutoML_d9a42d0e-1bd3-44bd-b975-960770366376')
remote_run

Experiment,Id,Type,Status,Details Page,Docs Page
UltrasonicVision-AutoML,AutoML_d9a42d0e-1bd3-44bd-b975-960770366376,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [11]:
best_run, best_model = remote_run.get_output()

In [12]:
best_model
best_model.steps

[('datatransformer',
  DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                  feature_sweeping_config=None, feature_sweeping_timeout=None,
                  featurization_config=None, force_text_dnn=None,
                  is_cross_validation=None, is_onnx_compatible=None, logger=None,
                  observer=None, task=None, working_dir=None)),
 ('RobustScaler',
  RobustScaler(copy=True, quantile_range=[10, 90], with_centering=False,
               with_scaling=False)),
 ('ExtraTreesClassifier',
  ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=0.01,
                       min_samples_split=0.10368421052631578,
                       min_weight_fraction_leaf=0.0, n_estimators=50,

## Examine Results

In [13]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

### Retrieve the Best ONNX Model

The Open Neural Network Exchange (ONNX) is an open-sources portability platform for models that allows you to convert models from one framework to another, or even to deploy models to a device (such as an iOS or Android mobile device)

In [16]:
!pip install onnxruntime-gpu



In [17]:
# Retrieve and save best automl model.
best_run, onnx_mdl = remote_run.get_output(return_onnx_model=True)


OSError: libcudnn.so.8: cannot open shared object file: No such file or directory

In [None]:
from azureml.automl.runtime.onnx_convert import OnnxConverter
onnx_fl_path = "./best_model.onnx"
OnnxConverter.save_onnx_model(onnx_mdl, onnx_fl_path)

In [None]:
compute_target.delete()