In [22]:
import logging

from matplotlib import pyplot as plt
import pandas as pd

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.train.automl import AutoMLConfig
from azureml.interpret import ExplanationClient

In [23]:
from azureml.core.experiment import Experiment

ws = Workspace.from_config()

In [25]:
from azureml.core import Dataset
from azureml.data.dataset_factory import DataType

# create a TabularDataset from a delimited file behind a public web url and convert column "Survived" to boolean
web_path ='https://dprepdata.blob.core.windows.net/demo/Titanic.csv'
titanic_ds = Dataset.Tabular.from_delimited_files(path=web_path, set_column_types={'Survived': DataType.to_bool()})

# preview the first 3 rows of titanic_ds
titanic_ds.take(3).to_pandas_dataframe()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,False,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,True,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,True,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [26]:
titanic_ds

{
  "source": [
    "https://dprepdata.blob.core.windows.net/demo/Titanic.csv"
  ],
  "definition": [
    "GetFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes",
    "SetColumnTypes"
  ]
}

In [27]:
from azureml.train.automl import AutoMLConfig

# task can be one of classification, regression, forecasting
automl_config = AutoMLConfig(task = "classification")

In [28]:
from azureml.core.dataset import Dataset
data = "https://dprepdata.blob.core.windows.net/demo/Titanic.csv"
dataset = Dataset.Tabular.from_delimited_files(data)

In [29]:
label = "Survived"

In [30]:
validation_data = "https://dprepdata.blob.core.windows.net/demo/Titanic.csv"
validation_dataset = Dataset.Tabular.from_delimited_files(validation_data)

In [31]:
test_data = "https://dprepdata.blob.core.windows.net/demo/Titanic.csv"
test_dataset = Dataset.Tabular.from_delimited_files(test_data)

In [32]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
cpu_cluster_name = "cpu-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=6)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [33]:
automl_settings = {
    "experiment_timeout_hours" : 0.3,
    "enable_early_stopping" : True,
    "iteration_timeout_minutes": 5,
    "max_concurrent_iterations": 4,
    "max_cores_per_iteration": -1,
    #"n_cross_validations": 2,
    "primary_metric": 'AUC_weighted',
    "featurization": 'auto',
    "verbosity": logging.INFO,
}

automl_config = AutoMLConfig(task = 'classification',
                             debug_log = 'automl_errors.log',
                             compute_target=compute_target,
                             experiment_exit_score = 0.9984,
                             blocked_models = ['KNN','LinearSVM'],
                             enable_onnx_compatible_models=True,
                             training_data = titanic_ds,
                             label_column_name = label,
                             validation_data = validation_dataset,
                             **automl_settings
                            )

In [34]:
from azureml.core.experiment import Experiment

ws = Workspace.from_config()

# Choose a name for the experiment and specify the project folder.
experiment_name = 'Titanic-automl'
project_folder = './titanic/automl-classification'

experiment = Experiment(ws, experiment_name)

In [35]:
run = experiment.submit(automl_config, show_output=True)



Submitting remote run.
No run_configuration provided, running on cpu-cluster with default configuration
Running on remote compute: cpu-cluster


Experiment,Id,Type,Status,Details Page,Docs Page
Titanic-automl,AutoML_abac9517-7f1e-457b-afe7-ca47c29704bd,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       DONE
DESCRIPTION:  If the missing values are expected, let the run complete. Otherwise cancel the current run and use a script to customize the handling of missing feature values that may be more appropriate based on the data type and business requirement.
              Learn more about missing value imputation: https://aka.ms/Automated

In [36]:
run.wait_for_completion()

{'runId': 'AutoML_abac9517-7f1e-457b-afe7-ca47c29704bd',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-05-07T14:12:48.26966Z',
 'endTimeUtc': '2021-05-07T14:31:36.923051Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'AUC_weighted',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'cpu-cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"1955c9dc-2da9-4895-886e-449aa1dde29a\\"}, \\"validation_data\\": {\\"datasetId\\": \\"29f6a0b9-dcbf-44e2-bb6f-18b27ada4eff\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-widgets": "1.26.0", "azureml-train": "1.26.0", "azureml-train-restclients-hyperdrive": "1.26.0", "azureml-train-core": "1.26.0", "azureml-train-automl-client": 

In [37]:
best_run_customized, fitted_model_customized = run.get_output()

Package:azureml-core, training version:1.27.0, current version:1.26.0
Package:azureml-dataprep, training version:2.14.2, current version:2.13.2
Package:azureml-dataprep-native, training version:33.0.0, current version:32.0.0
Package:azureml-dataprep-rslex, training version:1.12.1, current version:1.11.2
Package:azureml-dataset-runtime, training version:1.27.0, current version:1.26.0
Package:azureml-defaults, training version:1.27.0, current version:1.26.0
Package:azureml-interpret, training version:1.27.0, current version:1.26.0
Package:azureml-mlflow, training version:1.27.0, current version:1.26.0
Package:azureml-pipeline-core, training version:1.27.0, current version:1.26.0
Package:azureml-telemetry, training version:1.27.0, current version:1.26.0
Package:azureml-train-automl-client, training version:1.27.0, current version:1.26.0
Package:azureml-train-automl-runtime, training version:1.27.0.post1


In [38]:
custom_featurizer = fitted_model_customized.named_steps['datatransformer']
df = custom_featurizer.get_featurization_summary()
pd.DataFrame(data=df)

AttributeError: 'NoneType' object has no attribute 'named_steps'

In [39]:
df = custom_featurizer.get_featurization_summary(is_user_friendly=False)
pd.DataFrame(data=df)

NameError: name 'custom_featurizer' is not defined

In [None]:
df = custom_featurizer.get_stats_feature_type_summary()
pd.DataFrame(data=df)

In [41]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…



In [42]:
# Wait for the best model explanation run to complete
from azureml.core.run import Run
model_explainability_run_id = run.id + "_" + "ModelExplain"
print(model_explainability_run_id)
model_explainability_run = Run(experiment=experiment, run_id=model_explainability_run_id)
model_explainability_run.wait_for_completion()

# Get the best run object
best_run, fitted_model = run.get_output()

AutoML_abac9517-7f1e-457b-afe7-ca47c29704bd_ModelExplain


Package:azureml-core, training version:1.27.0, current version:1.26.0
Package:azureml-dataprep, training version:2.14.2, current version:2.13.2
Package:azureml-dataprep-native, training version:33.0.0, current version:32.0.0
Package:azureml-dataprep-rslex, training version:1.12.1, current version:1.11.2
Package:azureml-dataset-runtime, training version:1.27.0, current version:1.26.0
Package:azureml-defaults, training version:1.27.0, current version:1.26.0
Package:azureml-interpret, training version:1.27.0, current version:1.26.0
Package:azureml-mlflow, training version:1.27.0, current version:1.26.0
Package:azureml-pipeline-core, training version:1.27.0, current version:1.26.0
Package:azureml-telemetry, training version:1.27.0, current version:1.26.0
Package:azureml-train-automl-client, training version:1.27.0, current version:1.26.0
Package:azureml-train-automl-runtime, training version:1.27.0.post1


In [43]:
client = ExplanationClient.from_run(best_run)
engineered_explanations = client.download_model_explanation(raw=False)
exp_data = engineered_explanations.get_feature_importance_dict()
exp_data

{'Name_WordGramTfIdf_van Melkebeke': 0.0,
 'Name_WordGramTfIdf_Harald': 0.0,
 'Name_WordGramTfIdf_Hansen': 0.0,
 'Name_WordGramTfIdf_Hans Peder': 0.0,
 'Name_WordGramTfIdf_Hans Martin': 0.0,
 'Name_WordGramTfIdf_Hans Linus': 0.0,
 'Name_WordGramTfIdf_Hans Kristensen': 0.0,
 'Name_WordGramTfIdf_Hans': 0.0,
 'Name_WordGramTfIdf_Hanora Norah': 0.0,
 'Name_WordGramTfIdf_Hanora Nora': 0.0,
 'Name_WordGramTfIdf_Hanora': 0.0,
 'Name_WordGramTfIdf_Hanne Youssef': 0.0,
 'Name_WordGramTfIdf_Hanne': 0.0,
 'Name_WordGramTfIdf_Hannah Wizosky': 0.0,
 'Name_WordGramTfIdf_Hannah Godfrey': 0.0,
 'Name_WordGramTfIdf_Hannah': 0.0,
 'Name_WordGramTfIdf_Hanna Mr': 0.0,
 'Name_WordGramTfIdf_Hanna Assi': 0.0,
 'Name_WordGramTfIdf_Hanna': 0.0,
 'Name_WordGramTfIdf_Hampe Mr': 0.0,
 'Name_WordGramTfIdf_Hampe': 0.0,
 'Name_WordGramTfIdf_Hammad': 0.0,
 'Name_WordGramTfIdf_Hamalainen Mrs': 0.0,
 'Name_WordGramTfIdf_Hamalainen Master': 0.0,
 'Name_WordGramTfIdf_Hamalainen': 0.0,
 'Name_WordGramTfIdf_Halvorsen': 0.0



In [44]:
client = ExplanationClient.from_run(best_run)
engineered_explanations = client.download_model_explanation(raw=True)
exp_data = engineered_explanations.get_feature_importance_dict()
exp_data

{'Embarked': 0.0,
 'Cabin': 0.0,
 'Fare': 0.0,
 'Ticket': 0.0,
 'Parch': 0.0,
 'SibSp': 0.0,
 'Age': 0.0,
 'Sex': 0.0,
 'Name': 0.0,
 'Pclass': 0.0,
 'PassengerId': 0.0}

In [45]:
best_run, onnx_mdl = run.get_output(return_onnx_model=True)



ConfigException: ConfigException:
	Message: Module 'azureml-train-automl-runtime' is required in the current environment for running Remote or Local (in-process) runs. Please install this dependency (e.g. `pip install azureml-train-automl-runtime`) or provide a RunConfiguration.
	InnerException: None
	ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "Module 'azureml-train-automl-runtime' is required in the current environment for running Remote or Local (in-process) runs. Please install this dependency (e.g. `pip install azureml-train-automl-runtime`) or provide a RunConfiguration.",
        "target": "onnx-model",
        "inner_error": {
            "code": "NotSupported",
            "inner_error": {
                "code": "IncompatibleOrMissingDependency"
            }
        }
    }
}

In [47]:
from azureml.automl.runtime.onnx_convert import OnnxConverter
onnx_fl_path = "./best_model.onnx"
OnnxConverter.save_onnx_model(onnx_mdl, onnx_fl_path)

ModuleNotFoundError: No module named 'azureml.automl.runtime'

In [46]:
import sys
import json
from azureml.automl.core.onnx_convert import OnnxConvertConstants
from azureml.train.automl import constants

if sys.version_info < OnnxConvertConstants.OnnxIncompatiblePythonVersion:
    python_version_compatible = True
else:
    python_version_compatible = False

import onnxruntime
from azureml.automl.runtime.onnx_convert import OnnxInferenceHelper

def get_onnx_res(run):
    res_path = 'onnx_resource.json'
    run.download_file(name=constants.MODEL_RESOURCE_PATH_ONNX, output_file_path=res_path)
    with open(res_path) as f:
        onnx_res = json.load(f)
    return onnx_res

if python_version_compatible:
    test_df = test_dataset.to_pandas_dataframe()
    mdl_bytes = onnx_mdl.SerializeToString()
    onnx_res = get_onnx_res(best_run)

    onnxrt_helper = OnnxInferenceHelper(mdl_bytes, onnx_res)
    pred_onnx, pred_prob_onnx = onnxrt_helper.predict(test_df)

    print(pred_onnx)
    print(pred_prob_onnx)
else:
    print('Please use Python version 3.6 or 3.7 to run the inference helper.')

ModuleNotFoundError: No module named 'onnxruntime'



In [48]:
best_run, fitted_model = run.get_output()

Package:azureml-core, training version:1.27.0, current version:1.26.0
Package:azureml-dataprep, training version:2.14.2, current version:2.13.2
Package:azureml-dataprep-native, training version:33.0.0, current version:32.0.0
Package:azureml-dataprep-rslex, training version:1.12.1, current version:1.11.2
Package:azureml-dataset-runtime, training version:1.27.0, current version:1.26.0
Package:azureml-defaults, training version:1.27.0, current version:1.26.0
Package:azureml-interpret, training version:1.27.0, current version:1.26.0
Package:azureml-mlflow, training version:1.27.0, current version:1.26.0
Package:azureml-pipeline-core, training version:1.27.0, current version:1.26.0
Package:azureml-telemetry, training version:1.27.0, current version:1.26.0
Package:azureml-train-automl-client, training version:1.27.0, current version:1.26.0
Package:azureml-train-automl-runtime, training version:1.27.0.post1


In [49]:
model_name = best_run.properties['model_name']

script_file_name = 'inference/score.py'

best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'inference/score.py')

In [52]:
description = 'AutoML Model trained on bank marketing data to predict if a client will subscribe to a term deposit'
tags = None
model = run.register_model(model_name = model_name, description = description, tags = tags)

print(run.model_id) # This will be written to the script file later in the notebook.

AutoMLabac951773


In [53]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

inference_config = InferenceConfig(entry_script=script_file_name)

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 1, 
                                               tags = {'area': "bmData", 'type': "automl_classification"}, 
                                               description = 'sample service for Automl Classification')

aci_service_name = 'automl-titanic'
print(aci_service_name)
aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)
aci_service.wait_for_deployment(True)
print(aci_service.state)

automl-sample-bankmarketing-all
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-05-07 14:52:41+00:00 Creating Container Registry if not exists.
2021-05-07 14:52:42+00:00 Use the existing image.
2021-05-07 14:52:42+00:00 Generating deployment configuration.
2021-05-07 14:52:43+00:00 Submitting deployment to compute..
2021-05-07 14:52:47+00:00 Checking the status of deployment automl-sample-bankmarketing-all.



.
2021-05-07 14:55:57+00:00 Checking the status of inference endpoint automl-sample-bankmarketing-all.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [None]:
#aci_service.get_logs()

In [54]:
# Load the bank marketing datasets.
from numpy import array

In [55]:

X_test = test_dataset.drop_columns(columns=['Survived'])
y_test = test_dataset.keep_columns(columns=['Survived'], validate=True)
test_dataset.take(5).to_pandas_dataframe()



Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [56]:
X_test = X_test.to_pandas_dataframe()
y_test = y_test.to_pandas_dataframe()

In [57]:
import json
import requests

X_test_json = X_test.to_json(orient='records')
data = "{\"data\": " + X_test_json +"}"
headers = {'Content-Type': 'application/json'}

resp = requests.post(aci_service.scoring_uri, data, headers=headers)

y_pred = json.loads(json.loads(resp.text))['result']

In [58]:
actual = array(y_test)
actual = actual[:,0]
print(len(y_pred), " ", len(actual))

891   891


In [60]:

%matplotlib notebook
from sklearn.metrics import confusion_matrix
import numpy as np
import itertools

cf =confusion_matrix(actual,y_pred)
plt.imshow(cf,cmap=plt.cm.Blues,interpolation='nearest')
plt.colorbar()
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
class_labels = ['no','yes']
tick_marks = np.arange(len(class_labels))
plt.xticks(tick_marks,class_labels)
plt.yticks([-0.5,0,1,1.5],['','no','yes',''])
# plotting text value inside cells
thresh = cf.max() / 2.
for i,j in itertools.product(range(cf.shape[0]),range(cf.shape[1])):
    plt.text(j,i,format(cf[i,j],'d'),horizontalalignment='center',color='white' if cf[i,j] >thresh else 'black')
plt.show()

ValueError: Mix of label input types (string and number)

In [61]:
aci_service.delete()



In [63]:
print(aci_service.state)

Deleting


