# Automated ML

In [1]:
from azureml.core import Workspace, Experiment, Dataset
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails
from azureml.core.model import InferenceConfig 
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.model import Model

import pandas as pd

In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

experiment_name = 'capstone-heart-failure-prediction'
experiment=Experiment(ws, experiment_name)
experiment

run = experiment.start_logging()

ws_udacity_capstone
UdacityMLAzureCapstone
eastus2
ca1598e0-85dc-47d5-b06d-41b5342b4989


## Create New Cluster / Use Existing Cluster

In [3]:
# Create compute cluster and choose a name for it
cpu_cluster_name = "MLAzureCapstoneCompute"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    print('Creating a new compute cluster...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_v2', 
                                                            min_nodes=1, 
                                                            max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

# Can poll for a minimum number of nodes and for a specific timeout. 
# If no min node count is provided it uses the scale settings for the cluster.
compute_target.wait_for_completion(show_output=True)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing cluster, use it.

Running
{'errors': [], 'creationTime': '2021-04-22T16:36:06.518630+00:00', 'createdBy': {'userObjectId': '2f5770ca-7bf7-4ecc-bd4b-14652b1bbf0c', 'userTenantId': '3814e260-63cb-43a8-82ce-e862c309e004', 'userName': 'Abhi Ojha'}, 'modifiedTime': '2021-04-22T16:38:24.852462+00:00', 'state': 'Running', 'vmSize': 'STANDARD_DS3_V2'}


## Dataset

### Overview

This dataset used in this project is taken from [Kaggle](https://www.kaggle.com/andrewmvd/heart-failure-clinical-data). It consists of 12 distinct features and 1 target as summarized below:
- **Input features** - Age, Anaemia, Creatinine-phosphokinase, Diabetes, Ejection_fraction, High_blood_pressure, Platelets, Serum_creatinine, Serum_sodium, Sex, Smoking, Time
- **Target** - DEATH_EVENT

We will use this dataset for creating a model to predict mortality caused by heart failure.

In [4]:
local_data_path = 'data/heart_failure_clinical_records_dataset.csv'

# get the datastore to upload prepared data
datastore = ws.get_default_datastore()

# upload the local file from src_dir to the target_path in datastore
datastore.upload(src_dir='data', target_path='data')

# create a dataset referencing the cloud location
heart_failure_ds= Dataset.Tabular.from_delimited_files(path = [(datastore, (local_data_path))])

# register dataset
heart_failure_ds = heart_failure_ds.register(workspace=ws, name='heart_failure_ds', create_new_version=True)

df = pd.read_csv(local_data_path)
df.head()

Uploading an estimated of 1 files
Target already exists. Skipping upload for data/heart_failure_clinical_records_dataset.csv
Uploaded 0 files


Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [5]:
df.describe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,60.833893,0.431438,581.839465,0.41806,38.083612,0.351171,263358.029264,1.39388,136.625418,0.648829,0.32107,130.26087,0.32107
std,11.894809,0.496107,970.287881,0.494067,11.834841,0.478136,97804.236869,1.03451,4.412477,0.478136,0.46767,77.614208,0.46767
min,40.0,0.0,23.0,0.0,14.0,0.0,25100.0,0.5,113.0,0.0,0.0,4.0,0.0
25%,51.0,0.0,116.5,0.0,30.0,0.0,212500.0,0.9,134.0,0.0,0.0,73.0,0.0
50%,60.0,0.0,250.0,0.0,38.0,0.0,262000.0,1.1,137.0,1.0,0.0,115.0,0.0
75%,70.0,1.0,582.0,1.0,45.0,1.0,303500.0,1.4,140.0,1.0,1.0,203.0,1.0
max,95.0,1.0,7861.0,1.0,80.0,1.0,850000.0,9.4,148.0,1.0,1.0,285.0,1.0


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [6]:
# TODO: Put your automl settings here
automl_settings = { "n_cross_validations": 2,
                    "primary_metric": 'accuracy',
                    "enable_early_stopping": True,
                    "max_concurrent_iterations": 4,
                    "experiment_timeout_minutes": 15,
                }

# TODO: Put your automl config here
automl_config = AutoMLConfig(compute_target = compute_target,
                            task='classification',
                            training_data=heart_failure_ds,
                            label_column_name='DEATH_EVENT',
                            path = 'automl_runs',
                            featurization= 'auto',
                            debug_log = "automl_errors.log",
                            enable_onnx_compatible_models=True,
                            **automl_settings)

In [7]:
# Submit your experiment
automl_run = experiment.submit(automl_config, show_output=True)
automl_run.wait_for_completion()

print(f"Run Status: {automl_run.get_status()}")

Submitting remote run.
No run_configuration provided, running on MLAzureCapstoneCompute with default configuration
Running on remote compute: MLAzureCapstoneCompute


Experiment,Id,Type,Status,Details Page,Docs Page
capstone-heart-failure-prediction,AutoML_99f14971-4f03-446c-aed7-c4fd72ad11a7,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

******************************************************************

## Run Details

In [8]:
RunDetails(automl_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Best Model



In [11]:
best_automl_run, fit_model = automl_run.get_output()
print("Best run metrics :",best_automl_run.get_metrics())
print("Best run properties :",best_automl_run.get_properties())

Package:azureml-automl-runtime, training version:1.27.0.post1, current version:1.26.0
Package:azureml-core, training version:1.27.0, current version:1.26.0
Package:azureml-dataprep, training version:2.14.2, current version:2.13.2
Package:azureml-dataprep-native, training version:33.0.0, current version:32.0.0
Package:azureml-dataprep-rslex, training version:1.12.1, current version:1.11.2
Package:azureml-dataset-runtime, training version:1.27.0, current version:1.26.0
Package:azureml-defaults, training version:1.27.0, current version:1.26.0
Package:azureml-interpret, training version:1.27.0, current version:1.26.0
Package:azureml-mlflow, training version:1.27.0, current version:1.26.0
Package:azureml-pipeline-core, training version:1.27.0, current version:1.26.0
Package:azureml-telemetry, training version:1.27.0, current version:1.26.0
Package:azureml-train-automl-client, training version:1.27.0, current version:1.26.0
Package:azureml-train-automl-runtime, training version:1.27.0.post1,

Best run metrics : {'matthews_correlation': 0.67935260054681, 'norm_macro_recall': 0.6297318801124832, 'precision_score_micro': 0.8629306487695749, 'AUC_micro': 0.9072508115250064, 'recall_score_micro': 0.8629306487695749, 'AUC_macro': 0.89809500796316, 'f1_score_micro': 0.8629306487695749, 'precision_score_weighted': 0.8664304925332674, 'recall_score_weighted': 0.8629306487695749, 'f1_score_weighted': 0.8570995413243072, 'average_precision_score_macro': 0.883063656792922, 'average_precision_score_weighted': 0.9056582482126201, 'precision_score_macro': 0.8675485479990481, 'average_precision_score_micro': 0.9069318197930294, 'weighted_accuracy': 0.9002965233646666, 'AUC_weighted': 0.89809500796316, 'balanced_accuracy': 0.8148659400562416, 'f1_score_macro': 0.8311220225318399, 'log_loss': 0.4168406216112876, 'accuracy': 0.8629306487695749, 'recall_score_macro': 0.8148659400562416, 'accuracy_table': 'aml://artifactId/ExperimentRun/dcid.AutoML_99f14971-4f03-446c-aed7-c4fd72ad11a7_47/accura

In [12]:
# Save the best model

best_automl_run.download_file('outputs/model.pkl', 'best_automl_model.pkl')
print(best_automl_run)

Run(Experiment: capstone-heart-failure-prediction,
Id: AutoML_99f14971-4f03-446c-aed7-c4fd72ad11a7_47,
Type: azureml.scriptrun,
Status: Completed)


## Model Deployment

In [18]:
model = automl_run.register_model(model_name = 'best_automl_model.pkl')
print(automl_run.model_id)

environment = best_automl_run.get_environment()
entry_script='inference/scoring.py'
best_automl_run.download_file('outputs/scoring_file_v_1_0_0.py', entry_script)

inference_config = InferenceConfig(entry_script = entry_script, environment = environment)

# Deploy model using ACI WebService

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                                    memory_gb = 1, 
                                                    auth_enabled= True, 
                                                    enable_app_insights= True)

service = Model.deploy(ws, "aciservice", [model], inference_config, deployment_config)
service.wait_for_deployment(show_output = True)

best_automl_model.pkl
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-04-25 08:51:29+00:00 Creating Container Registry if not exists.
2021-04-25 08:51:29+00:00 Registering the environment.
2021-04-25 08:51:29+00:00 Use the existing image.
2021-04-25 08:51:30+00:00 Generating deployment configuration.
2021-04-25 08:51:30+00:00 Submitting deployment to compute.
2021-04-25 08:51:34+00:00 Checking the status of deployment aciservice..
2021-04-25 08:55:01+00:00 Checking the status of inference endpoint aciservice.
Succeeded
ACI service creation operation finished, operation "Succeeded"


TODO: In the cell below, send a request to the web service you deployed to test it.

In [20]:
%run endpoint.py

{"result": [1]}


TODO: In the cell below, print the logs of the web service and delete the service

In [21]:
# Printing the logs
print(service.get_logs())

2021-04-25T08:54:55,665359700+00:00 - iot-server/run 
2021-04-25T08:54:55,666891300+00:00 - rsyslog/run 
2021-04-25T08:54:55,680748200+00:00 - gunicorn/run 
2021-04-25T08:54:55,697176000+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_38f1e90f927390c3641ac84304d53445/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_38f1e90f927390c3641ac84304d53445/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_38f1e90f927390c3641ac84304d53445/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_38f1e90f927390c3641ac84304d53445/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_38f1e90f927390c3641ac84304d53445/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
rsyslogd

In [22]:
service.delete()