# Automated ML

In [3]:
import joblib

from azureml.core import Dataset, Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.model import InferenceConfig
from azureml.core.compute_target import ComputeTargetException
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core import Environment
from azureml.train.automl import AutoMLConfig
from azureml.core.model import Model
from azureml.widgets import RunDetails

In [4]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'heart-failure-autml'

exp = Experiment(ws, experiment_name)

In [5]:
cluster_name = "heart-failure-compute"

# Verfiy that cluster does not exist already
try:
    cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print("Found existing cluster, use it.")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2", max_nodes=4)
    cluster = ComputeTarget.create(ws, cluster_name, compute_config)

cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Dataset 
### Overview

This data set dates from 1988 and consists of four databases: Cleveland, Hungary, Switzerland, and Long Beach V. It contains 76 attributes, including the predicted attribute, but all published experiments refer to using a subset of 14 of them. The "DEATH EVENT" field refers to the presence of heart disease in the patient. It is integer valued 0 = no disease and 1 = disease


Before we can use Azure AutoML to predict heart failures, we have to upload the dataset to the datastore and subsequently register it:

In [10]:
found = False
key = "Heart-Failure-Dataset"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:

        datastore = ws.get_default_datastore()

        datastore.upload_files(files=["./heart_failure_clinical_records_dataset.csv"],
                                overwrite=True)

        # Create AML Dataset and register it into Workspace
        example_data = "heart_failure_clinical_records_dataset.csv"
        dataset = Dataset.Tabular.from_delimited_files(path=(datastore, "heart_failure_clinical_records_dataset.csv"))       
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key)

df = dataset.to_pandas_dataframe()

## AutoML Configuration

The following AutoML Config is used to get the best model:
- compute_target: This parameter specifies the target compute resource where the AutoML experiment will be executed. The cluster already is running is used.
- experiment_timeout_minutes: This parameter sets the maximum amount of time, in minutes, that the AutoML experiment is allowed to run. The experiment should be completed within 30 minutes.
- task: A classification task is specified since the model will be trained to predict a categorical variable.
- primary_metric: This parameter determines the evaluation metric that AutoML will use to optimize and compare the performance of different models. Accuracy is a good fit for a classification task.
- training_data: The Dataset will all relevant data including the target variable.
- label_column_name: Column name of the target variable.
- n_cross_validations: Since cross validations help to assess the model's generalization performance by splitting the training data into multiple subsets for training and validation, 5-fold cross validation is used.

In [11]:
automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 3,
    "primary_metric": "accuracy"
}
# Set parameters for AutoMLConfig
automl_config = AutoMLConfig(
    compute_target=cluster,
    task="classification",
    training_data=dataset,
    label_column_name="DEATH_EVENT",
    enable_early_stopping=True,
    featurization='auto',
    **automl_settings
)

In [12]:
# Submit automl run
remote_run = exp.submit(automl_config, show_output=True)

Submitting remote run.
No run_configuration provided, running on heart-failure-compute with default configuration
Running on remote compute: heart-failure-compute


Experiment,Id,Type,Status,Details Page,Docs Page
heart-failure-autml,AutoML_6b718489-5b88-4846-b544-883018174de8,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Cross validation
STATUS:       DONE
DESCRIPTION:  In order to accurately evaluate the model(s) trained by AutoML, we leverage a dataset that the model is not trained on. Hence, if the user doesn't provide an explicit validation dataset, a part of the training dataset is used to achieve this. For smaller datasets (fewer than 20,000 samples), cross-validation is leveraged, else a single hold-out set is split from the training data to serve as the validation dataset. Hence, for your input data we leverage cross-validation with 10 folds, if the number of training samples are fewer than 1000, and 3 folds in all other cases.
              Learn mo

## Run Details

In [13]:
RunDetails(remote_run).show()
remote_run.wait_for_completion()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

{'runId': 'AutoML_6b718489-5b88-4846-b544-883018174de8',
 'target': 'heart-failure-compute',
 'status': 'Completed',
 'startTimeUtc': '2023-07-14T08:44:20.468144Z',
 'endTimeUtc': '2023-07-14T08:59:44.747852Z',
 'services': {},
   'message': 'No scores improved over last 10 iterations, so experiment stopped early. This early stopping behavior can be disabled by setting enable_early_stopping = False in AutoMLConfig for notebook/python SDK runs.'}],
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'heart-failure-compute',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"6b0b677e-8899-49e6-bb85-c5e1c503e0d5\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions':

## Best Model

In [14]:
best_model, model = remote_run.get_output()

Package:azureml-automl-runtime, training version:1.51.0.post2, current version:1.49.0
Package:azureml-core, training version:1.51.0, current version:1.49.0
Package:azureml-dataprep, training version:4.10.8, current version:4.9.1
Package:azureml-dataprep-rslex, training version:2.17.12, current version:2.16.1
Package:azureml-dataset-runtime, training version:1.51.0, current version:1.49.0
Package:azureml-defaults, training version:1.51.0, current version:1.49.0
Package:azureml-interpret, training version:1.51.0, current version:1.49.0
Package:azureml-mlflow, training version:1.51.0, current version:1.49.0
Package:azureml-pipeline-core, training version:1.51.0, current version:1.49.0
Package:azureml-responsibleai, training version:1.51.0, current version:1.49.0
Package:azureml-telemetry, training version:1.51.0, current version:1.49.0
Package:azureml-train-automl-client, training version:1.51.0.post1, current version:1.49.0
Package:azureml-train-automl-runtime, training version:1.51.0.po

In [15]:
best_model.get_metrics()

{'recall_score_micro': 0.8964367816091954,
 'AUC_weighted': 0.9167654407758526,
 'f1_score_weighted': 0.8937583689557579,
 'average_precision_score_weighted': 0.9279168952288946,
 'AUC_micro': 0.9211677896683842,
 'average_precision_score_macro': 0.9064557466604016,
 'matthews_correlation': 0.7733091075113918,
 'f1_score_micro': 0.8964367816091954,
 'weighted_accuracy': 0.9115690257261072,
 'accuracy': 0.8964367816091954,
 'recall_score_macro': 0.8722086094683348,
 'balanced_accuracy': 0.8722086094683348,
 'AUC_macro': 0.9167654407758526,
 'average_precision_score_micro': 0.9246408548633764,
 'precision_score_weighted': 0.91093294759824,
 'log_loss': 0.3886299135499442,
 'precision_score_micro': 0.8964367816091954,
 'recall_score_weighted': 0.8964367816091954,
 'precision_score_macro': 0.9040411790726436,
 'f1_score_macro': 0.8760923514762438,
 'norm_macro_recall': 0.7444172189366698,
 'accuracy_table': 'aml://artifactId/ExperimentRun/dcid.AutoML_6b718489-5b88-4846-b544-883018174de8_34

In [16]:
model.steps

[('datatransformer',
  DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, task='classification')),
 ('prefittedsoftvotingclassifier',
  PreFittedSoftVotingClassifier(classification_labels=numpy.array([0, 1]), estimators=[('18', Pipeline(memory=None, steps=[('standardscalerwrapper', StandardScalerWrapper(copy=True, with_mean=False, with_std=False)), ('xgboostclassifier', XGBoostClassifier(booster='gbtree', colsample_bytree=0.5, eta=0.5, gamma=0, max_depth=6, max_leaves=3, n_estimators=10, n_jobs=1, objective='reg:logistic', problem_info=ProblemInfo(gpu_training_param_dict={'processing_unit_type': 'cpu'}), random_state=0, reg_alpha=0.7291666666666667, reg_lambda=2.3958333333333335, subsample=0.8, tree_method='auto'))], verbose=False)), ('28', Pipeline(memory=None, steps=[('sparsenormalizer', Normalizer(copy=True, nor

In [19]:
model = best_model.register_model(model_name="best_automl_model", model_path="outputs/model.pkl")chach

In [7]:
model = Model(ws, "best_automl_model")

## Model Deployment

In [8]:
# Create a deployment configuration
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

inference_env = Environment.get(ws, "AzureML-AutoML")

# Define the inference configuration
inference_config = InferenceConfig(entry_script="./score.py", environment=inference_env)

# Deploy the model to AKS
service = Model.deploy(ws, "heart-failure-service", [model], inference_config, deployment_config)
service.wait_for_deployment(show_output=True)
service.scoring_uri

To leverage new model deployment capabilities, AzureML recommends using CLI/SDK v2 to deploy models as online endpoint, 
please refer to respective documentations 
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoints /
https://docs.microsoft.com/azure/machine-learning/how-to-attach-kubernetes-anywhere 
For more information on migration, see https://aka.ms/acimoemigration 
  service = Model.deploy(ws, "heart-failure-service", [model], inference_config, deployment_config)


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2023-07-14 09:18:49+00:00 Creating Container Registry if not exists.
2023-07-14 09:18:49+00:00 Registering the environment.
2023-07-14 09:18:50+00:00 Use the existing image.
2023-07-14 09:18:51+00:00 Submitting deployment to compute.
2023-07-14 09:18:57+00:00 Checking the status of deployment heart-failure-service..
2023-07-14 09:21:04+00:00 Checking the status of inference endpoint heart-failure-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"


'http://b32fe92c-727b-468b-98fb-f6d67df91c6a.westeurope.azurecontainer.io/score'

In [None]:
# Enable app insights
service.update(enable_app_insights=True)
srvice.get_logs()

### Send request to model endpoint

In [9]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data =  {
  "data": [
    {
      "age": 0.0,
      "anaemia": 0,
      "creatinine_phosphokinase": 0,
      "diabetes": 0,
      "ejection_fraction": 0,
      "high_blood_pressure": 0,
      "platelets": 0.0,
      "serum_creatinine": 0.0,
      "serum_sodium": 0,
      "sex": 0,
      "smoking": 0,
      "time": 0
    }
  ]
}

body = str.encode(json.dumps(data))

url = 'http://b32fe92c-727b-468b-98fb-f6d67df91c6a.westeurope.azurecontainer.io/score'


headers = {'Content-Type':'application/json'}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))

b'"{\\"result\\": [1]}"'


In [None]:
service.delete()
cluster.delete()