# Automated ML

Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import os
import logging
import csv

import azureml.core
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core import Environment
from azureml.core import ScriptRunConfig

from azureml.core import Datastore
from azureml.core.dataset import Dataset

from azureml.train.automl import AutoMLConfig

from azureml.widgets import RunDetails

1.33.0


In [None]:
print(azureml.core.VERSION)

create an instance of Workspace and get an an Experiment instance.

In [3]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'udacity-capstone-project'
project_folder = "automl-heartfailure"
experiments = Experiment.list(ws, experiment_name=experiment_name)

if not experiments:
    experiment = Experiment(workspace=ws, name=experiment_name)
else:
    experiment = experiments[0]

print('Workspace name: ' + ws.name, 
    'Azure region: ' + ws.location, 
    'Subscription id: ' + ws.subscription_id, 
    'Resource group: ' + ws.resource_group, sep = '\n')

run = experiment.start_logging()

Performing interactive authentication. Please follow the instructions on the terminal.


Note, we have launched a browser for you to login. For old experience with device code, use "az login --use-device-code"


KeyboardInterrupt: 

## Dataset

### Overview
Cardiovascular diseases (CVDs) are the number 1 cause of death globally. 
CVDs commonly causes heart failures. 
Early detection of heart failure is one way of addressing the problem. 
Here we use machine learning approach to build a classification model relying on a Heart Failure prediction dataset. 
This dataset is available in Kaggle. 
The dataset consists of 12 features that are cardiovascular disease, hypertension, diabetes and so on.

TODO: Also mention the task you will be performing.

The goal is to build a binary classification model that predict heart failure.

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [None]:
from train import clean_data

from azureml.data.dataset_factory import TabularDatasetFactory

# Create a Dataset instance
found = False
key = "HeartFailurePrediction"
description_text = "Heart Failure Prediction DataSet for Udacity Capstone Project"

if key in ws.datasets.keys(): 
    found = True
    dataset_tmp = ws.datasets[key] 
else:
    print("Register heart_failure_clinical_records_dataset.csv into Workspace")


In [None]:
# get defaut_datastore
datastore = ws.get_default_datastore()

try:
    ds_prepared = TabularDatasetFactory.from_delimited_files(datastore.path("data/heartfailure_prepared.csv"))
except:
    print("heartfailure_prepared.csv is not available")
    ds_prepared = None

if not ds_prepared:
    # Use the clean_data function to clean your data.
    x, y = clean_data(dataset_tmp) 

    x["DEATH_EVENT"] = y

    os.makedirs(os.path.join(".", "data"), exist_ok=True)
    x.to_csv(os.path.join(".", "data", "heartfailure_prepared.csv"), index=False)

    datastore.upload(os.path.join(".", "data"), target_path="data")

    ds_prepared = TabularDatasetFactory.from_delimited_files(datastore.path("data/heartfailure_prepared.csv"))

### create compute targets

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Create compute cluster
cpu_cluster_name = "cpu-cluster-01vx"
vm_size = "Standard_DS3_v2"
min_nodes = 0
max_nodes = 6


try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print("Found existing cluster. use it")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                           min_nodes=min_nodes,
                                                           max_nodes=max_nodes)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

# a detailed status for the current cluster.
print(compute_target.get_status().serialize())

## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

Our task is to build a binary classification model. 
The model's performance was measured with the accuracy.
To reduce the overfitting of the model, cross validations was used. 
To save the model, enable_onnx_compatible_models was set to be True.

In [None]:
# Put your automl settings here
automl_settings = {
    "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": max_nodes-1,
    "primary_metric" : 'accuracy',
    "n_cross_validations" : 5,
    "enable_onnx_compatible_models" : True
}

# TODO: Put your automl config here

automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=ds_prepared,
                             label_column_name="DEATH_EVENT",
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

In [None]:
# Submit your experiment
automl_run = experiment.submit(automl_config)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

In the cell below, use the `RunDetails` widget to show the different experiments.

In [None]:
RunDetails(automl_run).show()

In [None]:
automl_run.wait_for_completion(show_output=True)

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [None]:
best_run, best_model = automl_run.get_output(return_onnx_model=True)

best_model  #best_model.steps

In [None]:
#TODO: Save the best model
from azureml.automl.runtime.onnx_convert import OnnxConverter

OnnxConverter.save_onnx_model(best_model, file_path="./automl_model.onnx")

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [None]:
# Register the model to deploy
model = automl_run.register_model(
    model_name=key, 
    description="Binary classification model for Heart Failure prediction" 
)

print(model.id)

### Local Testing

In [None]:
local_env = best_run.get_environment()

In [None]:
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import LocalWebservice

deployment_config = LocalWebservice.deploy_configuration(port=6789)

dummy_inference_config = InferenceConfig(
    environment=local_env,
    source_directory="./source_dir",
    entry_script="./score.py",
)

local_service = Model.deploy(
    workspace = ws,
    name = key.lower(),
    models = [model],
    inference_config = dummy_inference_config,
    deployment_config = deployment_config,
    overwrite=True,
)

local_service.wait_for_deployment(show_output=True)

In [None]:
import requests
import json

local_uri = local_service.scoring_uri
requests.get("http://localhost:6789")
headers = {"Content-Type": "application/json"}
data = {"data":
        [
            {
                "age" : "65", 
                "anaemia" : "0",
                "creatinine_phosphokinase" : "146",
                "diabetes" : "0", 
                "ejection_fraction" : "20",
                "high_blood_pressure" : "0",
                "platelets" : "162000",
                "serum_creatinine" : "1.3", 
                "serum_sodium" : "129",
                "sex" : "1",
                "smoking" : "1",
                "time" : "7"
            },
        ]
        }

input_data = json.dumps(data)
response = requests.post(local_uri, data=input_data, headers=headers)
print(response.json())

### Deployed environment

In [None]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.2
- numpy
- pandas
- scikit-learn
- onnxruntime
- pip:
  - azureml-defaults

In [None]:
# env = Environment(name="project_environment")
sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')



In [None]:
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import AciWebservice, Webservice



# Combine scoring script & environment in Inference configuration
inference_config = InferenceConfig( environment=_sklearn_env, source_directory='./source_dir', entry_script='./score.py')

# Set deployment configuration
deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1, auth_enabled = True, enable_app_insights=True)

# Define the model, inference, & deployment configuration and web service name and location to deploy
service = Model.deploy(
    workspace = ws,
    name = key,
    models = [model],
    inference_config = inference_config,
    deployment_config = deployment_config)

In [None]:
deployment_config.get_keys()

In [None]:
from azureml.core.authentication import InteractiveLoginAuthentication

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

TODO: In the cell below, send a request to the web service you deployed to test it.

In [None]:
import requests

uri = service.scoring_uri

headers = {"Content-Type": "application/json"}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

data = {
    "age" : "65", 
    "anaemia" : "0",
    "creatinine_phosphokinase" : "146",
    "diabetes" : "0", 
    "ejection_fraction" : "20",
    "high_blood_pressure" : "0",
    "platelets" : "162000",
    "serum_creatinine" : "1.3", 
    "serum_sodium" : "129",
    "sex" : "1",
    "smoking" : "1",
    "time" : "7",
}
input_data = json.dumps(data)
response = requests.post(uri, data=input_data, headers=headers)


In [None]:
print(response.json())

TODO: In the cell below, print the logs of the web service and delete the service

In [None]:
service.get_logs()

In [None]:
service.delete()
model.delete()

# compute_target.delete()