# Deploying a Subscription Classifier
I built a classifier to predict if the client will subscribe to a term deposit with the bank, and deployed it to an Azure Container Instance (ACI). Here are the details of the [dataset](https://archive.ics.uci.edu/ml/datasets/bank+marketing).

In [1]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.28.0


## Setup

#### Create an Azure Workspace

In [3]:
# from dotenv import load_dotenv

# load_dotenv()
# ws = Workspace.create(name='bank_marketing',
#                subscription_id=os.getenv('subscription_id'),
#                resource_group='rg20210512',
#                create_resource_group=True,
#                location='westus2'
#                )
ws = Workspace.from_config()



Deploying AppInsights with name bankmarkinsights5ab9d9d8.
Deployed AppInsights with name bankmarkinsights5ab9d9d8. Took 4.97 seconds.
Deploying StorageAccount with name bankmarkstorage8e1b54384.
Deploying KeyVault with name bankmarkkeyvault9ac738dd.
Deployed KeyVault with name bankmarkkeyvault9ac738dd. Took 19.62 seconds.
Deployed StorageAccount with name bankmarkstorage8e1b54384. Took 24.56 seconds.
Deploying Workspace with name bank_marketing.
Deployed Workspace with name bank_marketing. Took 21.34 seconds.


#### Create an Azure ML experiment

In [4]:
experiment_name = 'pipeline'

experiment = Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
pipeline,bank_marketing,Link to Azure Machine Learning studio,Link to Documentation


#### Create or Attach an AmlCompute cluster

In [5]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
cluster_name = "cpu"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', # for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)

Creating...
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded..................................................................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


## Data

In [85]:
from train import data_split
from azureml.core.dataset import Dataset 

train_data, val_data, test_data = data_split()
datastore = ws.get_default_datastore()
train_ds = Dataset.Tabular.register_pandas_dataframe(dataframe=train_data, 
                                                     target=datastore, 
                                                     name='train_data')
val_ds = Dataset.Tabular.register_pandas_dataframe(dataframe=val_data, 
                                                   target=datastore, 
                                                   name='val_data')
test_ds = Dataset.Tabular.register_pandas_dataframe(dataframe=test_data, 
                                                    target=datastore, 
                                                    name='test_data')



Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/983a70fd-cc92-4876-8908-e4957f7e639d/
Successfully uploaded file to datastore.
Creating and registering a new dataset.




Successfully created and registered a new dataset.
Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/0275f4cf-c467-40cb-af59-db87a14f6649/
Successfully uploaded file to datastore.
Creating and registering a new dataset.




Successfully created and registered a new dataset.
Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/63817e60-12d3-4897-bba0-6dd893cf5950/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


#### Review the Dataset Result

In [7]:
train_ds.take(5).to_pandas_dataframe()

Unnamed: 0,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,job_unemployed,...,age,duration,campaign,pdays,previous,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed
0,0,1,0,0,0,0,0,0,0,0,...,-0.194227,1.445235,-0.565922,-5.06384,1.671136,-1.134279,0.779734,0.475915,-1.570139,-2.428157
1,0,0,0,0,0,0,0,0,1,0,...,0.573445,0.079895,-0.565922,0.195414,-0.349494,-1.197935,-0.864955,-1.425496,-1.267445,-0.940281
2,0,0,0,0,0,0,0,0,0,0,...,-0.674021,2.455741,3.405226,0.195414,-0.349494,-1.197935,-1.17938,-1.231034,-1.37065,-0.940281
3,1,0,0,0,0,0,0,0,0,0,...,-0.76998,-0.355933,-0.565922,0.195414,-0.349494,0.839061,0.591424,-0.474791,0.770116,0.84517
4,1,0,0,0,0,0,0,0,0,0,...,0.381527,-0.81876,0.156105,0.195414,-0.349494,0.839061,0.591424,-0.474791,0.772999,0.84517


## Train

In [9]:
from azureml.train.automl import AutoMLConfig

automl_settings = {
    "experiment_timeout_hours" : 0.5,
    "enable_early_stopping" : True,
    "iteration_timeout_minutes": 5,
    "max_concurrent_iterations": 4,
    "max_cores_per_iteration": -1,
    "primary_metric": 'AUC_weighted',
    "featurization": 'off'
}

automl_config = AutoMLConfig(task = 'classification',
                             debug_log = 'automl_errors.log',
                             compute_target=compute_target,
                             experiment_exit_score = 0.95,
                             enable_onnx_compatible_models=True,
                             training_data = train_ds,
                             label_column_name = 'y_yes',
                             validation_data = val_ds,
                             **automl_settings
                            )
automl_run = experiment.submit(automl_config, show_output=True)

Submitting remote run.
No run_configuration provided, running on cpu with default configuration
Running on remote compute: cpu


Experiment,Id,Type,Status,Details Page,Docs Page
pipeline,AutoML_73691217-5c92-4ed4-abf7-42f6cec09285,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

****************************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
****************************************************************************************************

 ITERATION   

#### Create Pipeline and AutoMLStep

In [11]:
from azureml.pipeline.core import PipelineData, TrainingOutput

metrics_output_name = 'metrics_output'
best_model_output_name = 'best_model_output'

metrics_data = PipelineData(name='metrics_data',
                           datastore=datastore,
                           pipeline_output_name=metrics_output_name,
                           training_output=TrainingOutput(type='Metrics'))
model_data = PipelineData(name='model_data',
                           datastore=datastore,
                           pipeline_output_name=best_model_output_name,
                           training_output=TrainingOutput(type='Model'))

Create an AutoMLStep.

In [12]:
automl_step = AutoMLStep(
    name='automl_module',
    automl_config=automl_config,
    outputs=[metrics_data, model_data],
    allow_reuse=True)

In [13]:
from azureml.pipeline.core import Pipeline
pipeline = Pipeline(
    description="pipeline_with_automlstep",
    workspace=ws,    
    steps=[automl_step])

In [14]:
pipeline_run = experiment.submit(pipeline)



Created step automl_module [bc477ca8][9740bd02-5e42-4c52-ae42-489d1a658752], (This step will run and generate new outputs)
Submitted PipelineRun 19866ddc-643d-4ebb-96b0-5f619b4ef9a7
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/19866ddc-643d-4ebb-96b0-5f619b4ef9a7?wsid=/subscriptions/45a69fd7-1b5c-4963-a9c8-1c33e27e9b14/resourcegroups/rg20210512/workspaces/bank_marketing&tid=10e19cba-5b4d-42f0-a5b1-0e066efe7fe1


In [15]:
from azureml.widgets import RunDetails
RunDetails(pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [16]:
pipeline_run.wait_for_completion()

PipelineRunId: 19866ddc-643d-4ebb-96b0-5f619b4ef9a7
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/19866ddc-643d-4ebb-96b0-5f619b4ef9a7?wsid=/subscriptions/45a69fd7-1b5c-4963-a9c8-1c33e27e9b14/resourcegroups/rg20210512/workspaces/bank_marketing&tid=10e19cba-5b4d-42f0-a5b1-0e066efe7fe1
PipelineRun Status: Running


StepRunId: ec6c6110-066a-4e38-aee0-a06c02f168d7
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/ec6c6110-066a-4e38-aee0-a06c02f168d7?wsid=/subscriptions/45a69fd7-1b5c-4963-a9c8-1c33e27e9b14/resourcegroups/rg20210512/workspaces/bank_marketing&tid=10e19cba-5b4d-42f0-a5b1-0e066efe7fe1
StepRun( automl_module ) Status: Running

StepRun(automl_module) Execution Summary
StepRun( automl_module ) Status: Finished



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': '19866ddc-643d-4ebb-96b0-5f619b4ef9a7', 'status': 'Completed', 'startTimeUtc': '2021-05-12T15:48:53.078323Z', 'endTimeUtc': '2021-05-12T16:15:45.509148Z', 'properties': 

'Finished'

#### Retrieve the metrics of all child runs

In [17]:
metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)
num_file_downloaded = metrics_output.download('.', show_progress=True)

Downloading azureml/ec6c6110-066a-4e38-aee0-a06c02f168d7/metrics_data
Downloaded azureml/ec6c6110-066a-4e38-aee0-a06c02f168d7/metrics_data, 1 files out of an estimated total of 1


In [18]:
import json
with open(metrics_output._path_on_datastore) as f:
    metrics_output_result = f.read()
    
deserialized_metrics_output = json.loads(metrics_output_result)
df = pd.DataFrame(deserialized_metrics_output)
df

Unnamed: 0,ec6c6110-066a-4e38-aee0-a06c02f168d7_0,ec6c6110-066a-4e38-aee0-a06c02f168d7_2
accuracy,[0.9430084085954531],[0.8470881345375273]
recall_score_macro,[0.9430084085954531],[0.8470881345375272]
precision_score_weighted,[0.9453331808475964],[0.8483861876411893]
recall_score_micro,[0.9430084085954531],[0.8470881345375273]
weighted_accuracy,[0.9430084085954531],[0.8470881345375273]
f1_score_macro,[0.9429339331158928],[0.8469455680499125]
AUC_micro,[0.9905319527500115],[0.9267263021917127]
matthews_correlation,[0.8883385474964932],[0.6954731108160008]
f1_score_micro,[0.9430084085954531],[0.8470881345375273]
precision_score_macro,[0.9453331808475964],[0.8483861876411893]


#### Retrieve the Best Model

In [19]:
# Retrieve best model from Pipeline Run
best_model_output = pipeline_run.get_pipeline_output(best_model_output_name)
num_file_downloaded = best_model_output.download('.', show_progress=True)

Downloading azureml/ec6c6110-066a-4e38-aee0-a06c02f168d7/model_data
Downloaded azureml/ec6c6110-066a-4e38-aee0-a06c02f168d7/model_data, 1 files out of an estimated total of 1


In [20]:
import pickle

with open(best_model_output._path_on_datastore, "rb" ) as f:
    best_model = pickle.load(f)
best_model

Pipeline(memory=None,
         steps=[('MaxAbsScaler', MaxAbsScaler(copy=True)),
                ('LightGBMClassifier',
                 LightGBMClassifier(min_data_in_leaf=20, n_jobs=-1, problem_info=ProblemInfo(
    dataset_samples=6422,
    dataset_features=53,
    dataset_classes=2,
    dataset_num_categorical=0,
    dataset_categoricals=None,
    pipeline_categoricals=None,
    dataset_y_std=None,
    dataset_uid=None,
    subsampling...
    subsampling_schedule='hyperband_clip',
    cost_mode_param=None,
    iteration_timeout_mode=0,
    iteration_timeout_param=None,
    feature_column_names=None,
    label_column_name=None,
    weight_column_name=None,
    cv_split_column_names=None,
    enable_streaming=None,
    timeseries_param_dict=None,
    gpu_training_param_dict={'processing_unit_type': 'cpu'}
), random_state=None))],
         verbose=False)

In [21]:
best_model.steps

[('MaxAbsScaler', MaxAbsScaler(copy=True)),
 ('LightGBMClassifier',
  LightGBMClassifier(
      min_data_in_leaf=20,
      random_state=None,
      n_jobs=-1,
      problem_info=ProblemInfo(
          dataset_samples=6422,
          dataset_features=53,
          dataset_classes=2,
          dataset_num_categorical=0,
          dataset_categoricals=None,
          pipeline_categoricals=None,
          dataset_y_std=None,
          dataset_uid=None,
          subsampling=False,
          task='classification',
          metric=None,
          num_threads=-1,
          pipeline_profile='none',
          is_sparse=False,
          runtime_constraints={'mem_in_mb': None, 'wall_time_in_s': 300, 'total_wall_time_in_s': 31449600, 'cpu_time_in_s': None, 'num_processes': None, 'grace_period_in_s': None},
          constraint_mode=1,
          cost_mode=1,
          training_percent=None,
          num_recommendations=1,
          model_names_whitelisted=None,
          model_names_blacklisted=N

#### Load Test Data

In [22]:
X_test = test_data.drop(columns=['y_yes'])
y_test = test_data['y_yes']

#### Testing the Best Fitted Model

In [23]:
from sklearn.metrics import confusion_matrix
ypred = best_model.predict(X_test)
cm = confusion_matrix(y_test, ypred)

In [24]:
# Visualize the confusion matrix
pd.DataFrame(cm).style.background_gradient(cmap='Blues', 
                                           low=0, 
                                           high=0.9)

Unnamed: 0,0,1
0,6085,1179
1,58,916


#### Publish and run from REST endpoint

In [25]:
published_pipeline = pipeline_run.publish_pipeline(
    name="Bankmarketing Train", 
    description="Training bankmarketing pipeline", 
    version="1.0")

published_pipeline

Name,Id,Status,Endpoint
Bankmarketing Train,f352e34b-6570-470d-969f-25846216bb93,Active,REST Endpoint


In [26]:
from azureml.core.authentication import InteractiveLoginAuthentication

# Authenticate once again, to retrieve the auth_header so that the endpoint can be used
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

In [27]:
import requests

# Get the REST url from the endpoint property of the published pipeline object
rest_endpoint = published_pipeline.endpoint

# Build an HTTP POST request to the endpoint, specifying the authentication header
# Add a JSON payload object with the experiment name and the batch size parameter 
response = requests.post(rest_endpoint, 
                         headers=auth_header, 
                         json={"ExperimentName": "pipeline-rest-endpoint"}
                        )

In [28]:
try:
    response.raise_for_status()
except Exception:    
    raise Exception("Received bad response from the endpoint: {}\n"
                    "Response Code: {}\n"
                    "Headers: {}\n"
                    "Content: {}".format(rest_endpoint, 
                                         response.status_code, 
                                         response.headers, 
                                         response.content))

# Access the Id key from the response dict to get the value of the run id
run_id = response.json().get('Id')
print('Submitted pipeline run: ', run_id)

Submitted pipeline run:  568cc420-e5b4-4059-9fdf-9dc219f363df


In [29]:
from azureml.pipeline.core.run import PipelineRun
from azureml.widgets import RunDetails

# Use the run id to monitor the status of the new run
published_pipeline_run = PipelineRun(
    ws.experiments["pipeline-rest-endpoint"], 
    run_id)
RunDetails(published_pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

#### Explain the best model

In [51]:
from azureml.train.automl.run import AutoMLRun

# Retrieve the automl step from pipeline_run
automl_run_id = pipeline_run.find_step_run('automl_module')[0].id
remote_run = AutoMLRun(experiment=experiment, run_id=automl_run_id)
remote_run

Experiment,Id,Type,Status,Details Page,Docs Page
pipeline,ec6c6110-066a-4e38-aee0-a06c02f168d7,azureml.StepRun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [53]:
# Retrieve the best run and the best model
best_run, best_model = remote_run.get_output()
best_run, best_model

(Run(Experiment: pipeline,
 Id: ec6c6110-066a-4e38-aee0-a06c02f168d7_0,
 Type: azureml.scriptrun,
 Status: Completed),
 Pipeline(memory=None,
          steps=[('MaxAbsScaler', MaxAbsScaler(copy=True)),
                 ('LightGBMClassifier',
                  LightGBMClassifier(min_data_in_leaf=20, n_jobs=-1, problem_info=ProblemInfo(
     dataset_samples=6422,
     dataset_features=53,
     dataset_classes=2,
     dataset_num_categorical=0,
     dataset_categoricals=None,
     pipeline_categoricals=None,
     dataset_y_std=None,
     dataset_uid=None,
     subsampling...
     subsampling_schedule='hyperband_clip',
     cost_mode_param=None,
     iteration_timeout_mode=0,
     iteration_timeout_param=None,
     feature_column_names=None,
     label_column_name=None,
     weight_column_name=None,
     cv_split_column_names=None,
     enable_streaming=None,
     timeseries_param_dict=None,
     gpu_training_param_dict={'processing_unit_type': 'cpu'}
 ), random_state=None))],
          ve

In [56]:
# Wait for the best model explanation run to complete
from azureml.core.run import Run
model_explainability_run_id = remote_run.id + "_" + "ModelExplain"
print(model_explainability_run_id)
model_explainability_run = Run(experiment=experiment, run_id=model_explainability_run_id)
model_explainability_run.wait_for_completion()

ec6c6110-066a-4e38-aee0-a06c02f168d7_ModelExplain


{'runId': 'ec6c6110-066a-4e38-aee0-a06c02f168d7_ModelExplain',
 'target': 'cpu',
 'status': 'Completed',
 'startTimeUtc': '2021-05-12T16:33:25.262997Z',
 'endTimeUtc': '2021-05-12T16:36:31.903062Z',
 'properties': {'azureml.runsource': 'automl',
  'parentRunId': 'ec6c6110-066a-4e38-aee0-a06c02f168d7_0',
  '_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': 'a99a7cf3-9338-475c-abb1-7578ed6c87d0',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json',
  'dependencies_versions': '{"azureml-train-automl": "1.28.0", "azureml-train-automl-runtime": "1.28.0", "azureml-train-automl-client": "1.28.0", "azureml-telemetry": "1.28.0", "azureml-model-management-sdk": "1.0.1b6.post1", "azureml-interpret": "1.28.0", "azureml-explain-model": "1.28.0", "azureml-defaults": "1.28.0", "azureml-dataset-runtime": "1.28.0", "azureml-dataprep": "2.15.0", "azureml-dataprep-rslex": "1.13.0", "azureml-dataprep-native": "33.0.0", "azureml-c

In [58]:
from azureml.interpret import ExplanationClient
client = ExplanationClient.from_run(best_run)
engineered_explanations = client.download_model_explanation(raw=False)
# Get feature importances
exp_data = engineered_explanations.get_feature_importance_dict()
exp_data

{'duration': 1.9781721507942636,
 'nr_employed': 1.0711534146409065,
 'emp_var_rate': 0.6246221486750684,
 'euribor3m': 0.282648725728944,
 'cons_conf_idx': 0.24776518198458516,
 'pdays': 0.1618717905377934,
 'cons_price_idx': 0.13062487653961488,
 'age': 0.10128222616860082,
 'campaign': 0.07588997592152479,
 'month_may': 0.049932333464452534,
 'default_unknown': 0.04551160255811471,
 'poutcome_success': 0.03576375146857658,
 'education_university_degree': 0.02790740906283665,
 'job_blue-collar': 0.0277027720741297,
 'housing_yes': 0.02476769454879619,
 'poutcome_nonexistent': 0.021855194489417924,
 'month_oct': 0.02100486675968618,
 'day_of_week_thu': 0.017848323708742692,
 'day_of_week_mon': 0.017515105375979702,
 'month_jul': 0.017445928265125466,
 'previous': 0.014906317421153713,
 'education_basic_9y': 0.013871839960255537,
 'month_mar': 0.013692754764256196,
 'contact_telephone': 0.013217678945706038,
 'day_of_week_tue': 0.012575170375694533,
 'job_self-employed': 0.011164537803

## Deploy

#### Register the best model

In [60]:
model_name = best_run.properties['model_name']

script_file_name = 'inference/score.py'

best_run.download_file('outputs/scoring_file_v_1_0_0.py', 
                       'inference/score.py')

In [62]:
description = 'MaxAbsScaler/LightGBM model trained on bank marketing data to predict if a client will subscribe to a term deposit'
tags = None
model = remote_run.register_model(model_name = model_name, 
                                  description = description, 
                                  tags = tags)

print(remote_run.model_id)

ec6c6110066a4e30


#### Deploy the model to ACI

In [65]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

inference_config = InferenceConfig(entry_script=script_file_name)

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 1, 
                                               tags = {'area': "bank_marketing", 'type': "classification"}, 
                                               description = 'ACI_service')

aci_service_name = 'bank-marketing-aci-service'
aci_service = Model.deploy(ws, 
                           aci_service_name, 
                           [model], 
                           inference_config, 
                           aciconfig)
aci_service.wait_for_deployment(True)
print(aci_service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-05-12 14:22:50-05:00 Creating Container Registry if not exists.
2021-05-12 14:22:53-05:00 Use the existing image.
2021-05-12 14:22:53-05:00 Generating deployment configuration.
2021-05-12 14:22:54-05:00 Submitting deployment to compute..
2021-05-12 14:23:00-05:00 Checking the status of deployment bank-marketing-aci-service..
2021-05-12 14:28:43-05:00 Checking the status of inference endpoint bank-marketing-aci-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [66]:
aci_service.get_logs()

'2021-05-12T19:28:41,157456100+00:00 - gunicorn/run \n2021-05-12T19:28:41,161993800+00:00 - rsyslog/run \n2021-05-12T19:28:41,161061600+00:00 - iot-server/run \n2021-05-12T19:28:41,265635400+00:00 - nginx/run \nrsyslogd: /azureml-envs/azureml_48d60bd6e7fab6edf5a4021f49cfe5d3/lib/libuuid.so.1: no version information available (required by rsyslogd)\nEdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...\n2021-05-12T19:28:41,635256700+00:00 - iot-server/finish 1 0\n2021-05-12T19:28:41,638970600+00:00 - Exit code 1 is normal. Not restarting iot-server.\nStarting gunicorn 20.1.0\nListening at: http://127.0.0.1:31311 (67)\nUsing worker: sync\nworker timeout is set to 300\nBooting worker with pid: 95\nSPARK_HOME not set. Skipping PySpark Initialization.\nGenerating new fontManager, this may take some time...\nInitializing logger\n2021-05-12 19:28:44,054 | root | INFO | Starting up app insights client\n2021-05-12 19:28:44,055 | root | INFO | Starting up request id generato

## Test

In [67]:
import json
import requests

X_test_json = X_test.to_json(orient='records')
data = "{\"data\": " + X_test_json +"}"
headers = {'Content-Type': 'application/json'}

resp = requests.post(aci_service.scoring_uri, data, headers=headers)

y_pred = json.loads(json.loads(resp.text))['result']

In [73]:
cm = confusion_matrix(y_test, y_pred)
# Visualize the confusion matrix
pd.DataFrame(cm).style.background_gradient(cmap='Blues', 
                                           low=0, 
                                           high=0.9)

Unnamed: 0,0,1
0,6084,1180
1,58,916


## Save Model

#### Retrieve and Save the Best ONNX Model

In [77]:
from azureml.automl.runtime.onnx_convert import OnnxConverter
from azureml.train.automl import constants
if not os.path.isdir('onnx'):
    os.mkdir('onnx')
onnx_fl_path = "onnx/best_model.onnx"
best_run, onnx_mdl = remote_run.get_output(return_onnx_model=True)
OnnxConverter.save_onnx_model(onnx_mdl, 
                              onnx_fl_path)
res_path = "onnx/onnx_resource.json"
best_run.download_file(name=constants.MODEL_RESOURCE_PATH_ONNX, 
                       output_file_path=res_path)


#### Load ONNX Model

In [78]:
import onnx
import json
onnx_model = onnx.load(onnx_fl_path)
with open(res_path) as f:
    onnx_res = json.load(f)

#### Predict with the ONNX model, using onnxruntime package

In [79]:
import sys
import json
from azureml.automl.core.onnx_convert import OnnxConvertConstants
import onnxruntime
from azureml.automl.runtime.onnx_convert import OnnxInferenceHelper

if sys.version_info < OnnxConvertConstants.OnnxIncompatiblePythonVersion:
    python_version_compatible = True
else:
    python_version_compatible = False

if python_version_compatible:
    test_df = test_ds.to_pandas_dataframe()
    mdl_bytes = onnx_model.SerializeToString()
    onnxrt_helper = OnnxInferenceHelper(mdl_bytes, onnx_res)
    pred_onnx, pred_prob_onnx = onnxrt_helper.predict(test_df)

    print(pred_onnx)
    print(pred_prob_onnx)
else:
    print('Please use Python version 3.6 or 3.7 to run the inference helper.')

[1 1 1 ... 1 1 1]
[[0.02743155 0.97256845]
 [0.3542506  0.6457494 ]
 [0.47314328 0.5268567 ]
 ...
 [0.02073169 0.9792683 ]
 [0.11396033 0.8860397 ]
 [0.00473678 0.9952632 ]]


In [86]:
cm = confusion_matrix(test_df['y_yes'], pred_onnx)
# Visualize the confusion matrix
pd.DataFrame(cm).style.background_gradient(cmap='Blues', 
                                           low=0, 
                                           high=0.9)

Unnamed: 0,0,1
0,2912,299
1,67,3144
