# Automated ML


In [2]:
import pandas as pd
import numpy as np
import json
import joblib
import json
import requests
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split  
from sklearn.preprocessing import OneHotEncoder



In [3]:
from azureml.train.automl import AutoMLConfig
from azureml.core.run import Run
from azureml.widgets import RunDetails
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core import Dataset
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core.webservice import Webservice
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.model import InferenceConfig

In [4]:
ws = Workspace.from_config()
experiment_name = 'heart_failure_automl'
experiment = Experiment(ws, experiment_name)

run = experiment.start_logging()

In [9]:
amlcompute_cluster_name = "notebook134413" #Remember to change cluster name!

try:
    compute_target = ComputeTarget(workspace = ws, name = amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_confisguration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

#compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)

Found existing cluster, use it.


## Dataset

### Overview
* Cardiovascular diseases are the number 1 cause of death globally, taking an estimated 17.9 million lives each year,  Heart failure is a common event caused by CVDs. This dataset contains 12 features that may contribute to the cause of heart failure. Such as age, diabetes, high_blood_pressure and anaemia. 

* The dataset consists of 299 training examples and 13 features, we aim to predict the feature DEATH_EVENT which may have the value 1 in case of death due to heart faulure and 0 in case of survival.
* The task we are performing here is **Classification** and I will use Azure's AutoML to find the model that best fits the data and has the highest accuracy. 

In [20]:
from azureml.core import Dataset
ds = Dataset.get_by_name(ws, name = "heart failure")
df = ds.to_pandas_dataframe()
df.head(3)


## AutoML Configuration

AutoML is a powerful tool that enables us to find the best model quickly. For my automl run I've used the following settings & configurations to find best combination of algorithms & hyperparameters:
* The primary metric is set to *accuracy*.
* The task is set to *classification* because we aim to get a binary result either 1 or 0, death or no death.
* We use the traing data we got from the dataset & we define the target column.
* Logs have been generated for debugging reasons. 
* Auto featurization is enabled, featurization includes automated feature engineering and scaling and normalization, which then impacts the selected algorithm and its hyperparameter values.
* Early stopping is enabled to save computational power. 
* Number of cross validation is set to 3.



In [21]:
# automl settings
automl_settings = {
    "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'accuracy'}

# automl config
automl_config = AutoMLConfig(compute_target = compute_target,
                             task = "classification",
                             training_data=ds,
                             label_column_name="DEATH_EVENT",   
                             featurization= 'auto',
                             n_cross_validations = 3,
                            enable_early_stopping= True,
                             debug_log = "automl_logs.log",
                             **automl_settings)

In [22]:
# Submitting the experiment
remote_run = experiment.submit(automl_config, show_output=True)

Running on remote.
No run_configuration provided, running on notebook134413 with default configuration
Running on remote compute: notebook134413
Parent Run ID: AutoML_a0ce9e98-d212-464a-84ef-994c229fed02

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputa

## Run Details

In [23]:
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…



****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardinality feature detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and no high cardinality features were detected.
              Learn more abo

{'runId': 'AutoML_a0ce9e98-d212-464a-84ef-994c229fed02',
 'target': 'notebook134413',
 'status': 'Completed',
 'startTimeUtc': '2021-01-12T01:47:25.930264Z',
 'endTimeUtc': '2021-01-12T02:03:15.238864Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '3',
  'target': 'notebook134413',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"6027b021-c47e-4741-bb9e-fde65927418d\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"UI/01-12-2021_014544_UTC/heart_failure.csv\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"aml-quickstarts-134413\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"6b4af8be-9931-443e-90f6-c4c

## Best Model

In [26]:
best_run, model = remote_run.get_output()

#Printing the best run
print(model) 
print('\nBest Run Id: ', best_run.id)

#Printing the metric details of the best run
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                        class_weight='balanced',
                                                                                        coef0=0.0,
                                                                                        decision_function_shape='ovr',
                                                             

In [27]:
#Saving the best model
joblib.dump(model,'outputs/automl_model.pkl')

['outputs/automl_model.pkl']

## Model Deployment

* The AutoML model outperformed the model tuned by HyperDrive so we'll start deploying the best automl model. 

In [28]:
from azureml.core.model import Model
model = Model.register(workspace = ws,
                        model_path ="outputs/automl_model.pkl",
                        model_name = "automl_model")

Registering model automl_model


In [36]:
%%writefile score_automl.py
from azureml.core.model import Model
import numpy as np
import pandas as pd
import joblib
import json
import pickle
import os

def init():
    global model
    model_path = Model.get_model_path("automl_model")
    model = joblib.load(model_path)

def run(raw_data):
    try:
        data = json.loads(raw_data)['data']
        data = pd.DataFrame.from_dict(data)
        result = model.predict(data)
        return result.tolist()
    
    except Exception as ex:
        error = str(ex)
        return error

Overwriting score_automl.py


In [37]:
env = Environment.get(workspace=ws, name = "AzureML-AutoML")

In [43]:
from azureml.core.model import Model
from azureml.core.webservice import Webservice 
from azureml.core.webservice import  AciWebservice
from azureml.core.conda_dependencies import CondaDependencies
config_aci = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               enable_app_insights=True, 
                                               auth_enabled=True)



ws = Workspace.from_config()
model = Model(ws, 'automl_model')

inference_config = InferenceConfig(entry_script="score_automl.py", environment=env)

service_name = 'automl'
service = Model.deploy(workspace=ws, 
                       name=service_name, 
                       models=[model], 
                       inference_config=inference_config, 
                       deployment_config= config_aci)

service.wait_for_deployment(show_output=True)

print(service.state)
print(service.get_logs())

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running...............................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy
2021-01-12T02:42:41,749053818+00:00 - iot-server/run 
2021-01-12T02:42:41,749196419+00:00 - rsyslog/run 
2021-01-12T02:42:41,749377619+00:00 - gunicorn/run 
2021-01-12T02:42:41,792004712+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr

In [48]:
print(service.get_logs())

2021-01-12T02:42:41,749053818+00:00 - iot-server/run 
2021-01-12T02:42:41,749196419+00:00 - rsyslog/run 
2021-01-12T02:42:41,749377619+00:00 - gunicorn/run 
2021-01-12T02:42:41,792004712+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
rsyslogd

In [44]:
print("scoring URI: " + service.scoring_uri)

print("Swagger URI: " + service.swagger_uri)

print("Authetication Key: " + service.get_keys()[0])

scoring URI: http://a6a8b8cd-cc95-4757-a55e-dc487d0a0070.southcentralus.azurecontainer.io/score
Swagger URI: http://a6a8b8cd-cc95-4757-a55e-dc487d0a0070.southcentralus.azurecontainer.io/swagger.json
Authetication Key: DheElutv21WKWW1C9VYbr2l6wzcLkVy4


In [46]:
primary_key, secondary_key = service.get_keys()
print(primary_key,'\n',secondary_key)

DheElutv21WKWW1C9VYbr2l6wzcLkVy4 
 DXCVaeM79fZU2oFeO6AZbbTjDzKtpQ9K


TODO: In the cell below, send a request to the web service you deployed to test it.

In [47]:
key = primary_key
scoringuri = service.scoring_uri

data= { "data":
       [
           {
               'age': 60,
               'anaemia': 245,
               'creatinine_phosphokinase': 0,
               'diabetes': 0,
               'ejection_fraction': 38,
               'high_blood_pressure': 1,
               'platelets': 163000,
               'serum_creatinine': 50,
               'serum_sodium':100,
               'sex':1,
               'smoking':1,
               'time':7
               
               
           }
       ]
    }
input_data = json.dumps(data)

headers = {'Content-Type': 'application/json'}
headers['Authorization'] = f'Bearer {key}'

response = requests.post(scoringuri, input_data, headers = headers)
print(response.text)

[1]


TODO: In the cell below, print the logs of the web service and delete the service

In [49]:
ws = Workspace.from_config()

service = Webservice(name="aml-service", workspace=ws)
service.update(enable_app_insights=True)
logs = service.get_logs()
print(logs)

2021-01-12T02:44:58,371356973+00:00 - iot-server/run 
2021-01-12T02:44:58,371770998+00:00 - gunicorn/run 
2021-01-12T02:44:58,372986871+00:00 - rsyslog/run 
2021-01-12T02:44:58,377098918+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
rsyslogd

In [50]:
#Deleting the service
service.delete()
print(service)

No service with name aml-service found to delete.
AciWebservice(workspace=Workspace.create(name='quick-starts-ws-134413', subscription_id='6b4af8be-9931-443e-90f6-c4c34a1f9737', resource_group='aml-quickstarts-134413'), name=aml-service, image_id=None, compute_type=None, state=ACI, scoring_uri=Failed, tags=None, properties={}, created_by={'hasInferenceSchema': 'False', 'hasHttps': 'False'})


In [51]:
#Deleting the compute cluster once the training process is complete
compute_target.delete()