# Automated ML

Import Dependencies. 

In [None]:
!pip install xgboost==0.90

In [None]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

In [None]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')


Create Compute Cluster : Check if a compute cluster exists. Create a new cluster if a cluster doesn't exist. 

In [None]:
amlcompute_cluster_name = "cluster-capstone"
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           min_nodes=1,
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

    compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)

## Dataset

### Overview
Cardiovascular diseases (CVDs) are the number 1 cause of death globally, taking an estimated 17.9 million lives each year, which accounts for 31% of all deaths worlwide.Heart failure is a common event caused by CVDs and this dataset contains 12 features that can be used to predict mortality by heart failure. Most cardiovascular diseases can be prevented by addressing behavioural risk factors such as tobacco use, unhealthy diet and obesity, physical inactivity and harmful use of alcohol using population-wide strategies.

People with cardiovascular disease or who are at high cardiovascular risk (due to the presence of one or more risk factors such as hypertension, diabetes, hyperlipidaemia or already established disease) need early detection and management wherein a machine learning model can be of great help.

This Heart Failure PRediction dataset is downloaded from Kaggle. Death due to Heart Failure is predicted  using information usch as anaemia, diabetes, high blood pressure, platelets, serum_creatinine, serum_sodium, creatinine_phosporous and ejection_fraction(%bllod leaving the heart at each contraction).

In [None]:
found = False
key = "HeartFailure Data"
description_text = "HeartFailure Data from Kaggle"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found: 
    # get the datastore to upload prepared data
    datastore = ws.get_default_datastore()

    # upload the local file from src_dir to the target_path in datastore
    datastore.upload(src_dir='data', target_path='data')

    # create a dataset referencing the cloud location
    dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, ('data/heart_failure_clinical_records_dataset.csv'))])
    dataset = dataset.register(workspace=ws, name="HeartFailure Data", description = "HeartFailure Data from Kaggle" )

df = dataset.to_pandas_dataframe()
df.describe()

## AutoML Configuration

Azure Maching Learning support automatic training and comparison of maching learning models using AutoML. To Run AutoML, an AutoMLConfig object needs to be created with paramteres such as dataset, training cluster, machine learning task, target column and metric to evaluate the algorithm.

In [None]:
# Put your automl settings here
automl_settings = {
    "experiment_timeout_hours": 2,
    "max_concurrent_iterations": 10,
    "primary_metric" : 'AUC_weighted'
}

# Put your automl config here
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="DEATH_EVENT", 
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

In [None]:
# choose a name for experiment
experiment_name = 'heart-failure-prediction-automl'

experiment=Experiment(ws, experiment_name)

remote_run = experiment.submit(automl_config)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

In the cell below, use the `RunDetails` widget to show the different experiments.

In [None]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

In [None]:
remote_run.wait_for_completion(show_output=True)

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



The AutoML fitted model can be obtained using the get_output() method of the AutoML run object. The fitted model describes the steps used in the model, the data transformations applied and the hyperparameters used in the model

In [None]:
best_run, fitted_model = remote_run.get_output()
print(best_run)
print(fitted_model)
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)



In [None]:
model = best_run.register_model(model_path='outputs/model.pkl', model_name='automal_heartfailure_prediction_model',
                        tags={'Training context':'Auto ML'},
                        properties={'AUC': best_run_metrics['AUC_weighted'], 'Accuracy': best_run_metrics['accuracy']})


In [None]:
print(model.name, model.id, model.version, sep='\t')

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [None]:
env = best_run.get_environment()

script_file = 'score.py'
best_run.download_file('outputs/scoring_file_v_1_0_0.py', script_file)

Deploy the model to Azure Container Service, and get the scoring URL from the deployed service.

In [None]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

inference_config = InferenceConfig(entry_script=script_file, environment=env)

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                                       memory_gb = 1)

deploy_service_name= 'automl-model-deployment'
service = Model.deploy(ws,deploy_service_name,  [model], inference_config, deployment_config)

service.wait_for_deployment(show_output = True)

scoring_uri = service.scoring_uri

print("State: ",service.state)
print("\nScoring URI: ", scoring_uri)

Enable logging on the service

In [None]:

# Enable app insights
service.update(enable_app_insights=True)

Test the scoring service using two samples

In [None]:
import requests
import json

# URL for the web service, should be similar to:
#scoring_uri = 'http://0d469fee-33c0-4ce9-9dac-2767356110c9.southcentralus.azurecontainer.io/score'
# If the service is authenticated, set the key or token
#key = 'zuv0yY9prOPHFssuYSFWDJPhnVMxgJqG'
#two set of data to score, so we get two results back
#age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT

data = {"data": [
    {"age": 75, 
     "anaemia": 0, 
     "creatinine_phosphokinase": 582, 
     "diabetes": 0, 
     "ejection_fraction": 20, 
     "high_blood_pressure": 1, 
     "platelets": 265000, 
     "serum_creatinine": 1.9, 
     "serum_sodium": 130, 
     "sex": 1, 
     "smoking": 0, 
     "time": 4},

    {"age": 55, 
     "anaemia": 0, 
     "creatinine_phosphokinase": 7861, 
     "diabetes": 0, 
     "ejection_fraction": 38, 
     "high_blood_pressure": 0, 
     "platelets": 263358.03, 
     "serum_creatinine": 1.1, 
     "serum_sodium": 136, 
     "sex": 1, 
     "smoking": 0, 
     "time": 6},
      ]}
    
# Convert to JSON string
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
#headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())
print("Case 0: Death, Case 1: Death.")

Test the model using two samples

In [None]:
data = [{"age": 65, 
     "anaemia": 0, 
     "creatinine_phosphokinase": 146, 
     "diabetes": 0, 
     "ejection_fraction": 20, 
     "high_blood_pressure": 0, 
     "platelets": 162000, 
     "serum_creatinine": 1.3, 
     "serum_sodium": 129, 
     "sex": 1, 
     "smoking": 1, 
     "time": 7},
    
    {"age": 55, 
     "anaemia": 0, 
     "creatinine_phosphokinase": 109, 
     "diabetes": 0, 
     "ejection_fraction": 35, 
     "high_blood_pressure": 0, 
     "platelets": 254000, 
     "serum_creatinine": 1.1, 
     "serum_sodium": 139, 
     "sex": 1, 
     "smoking": 1, 
     "time": 60},
      ]
    
print(data)


In [None]:
# test using service instance
input_data = json.dumps({
    'data': data
})

output = service.run(input_data)
print(output)
print("Case 0: Death, Case 1: No Death.")

In [None]:

logs = service.get_logs()
logs

In [None]:
service.delete()