# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import os
import shutil

from azureml.widgets import RunDetails
from azureml.core import ScriptRunConfig
from azureml.core import Environment
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.hyperdrive.parameter_expressions import choice

In [2]:
ws = Workspace.from_config()

experiment_name = 'heartfailure_hyperdrive'

experiment=Experiment(ws, experiment_name)

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = experiment.start_logging()



Workspace name: quick-starts-ws-135475
Azure region: southcentralus
Subscription id: 48a74bb7-9950-4cc1-9caa-5d50f995cc55
Resource group: aml-quickstarts-135475


## Dataset

### Overview
Cardiovascular diseases (CVDs) are the number 1 cause of death globally, taking an estimated 17.9 million lives each year, which accounts for 31% of all deaths worlwide.
Heart failure is a common event caused by CVDs and this dataset contains 12 features that can be used to predict mortality by heart failure.

The dataset was downloaded from [kaggle](https://www.kaggle.com/andrewmvd/heart-failure-clinical-data) and a copy from the dataset was saved into the [git repository](https://github.com/heber-augusto/Nanodegree_Azure_ML_Engineer_CapstoneProject/tree/master/data).


In [3]:
# Try to load the dataset from the Workspace. Otherwise, create it from the file
found = False
key = "heart-failure-prediction"
description_text = "Heart Failure Prediction dataset"
label = "DEATH_EVENT"
if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
        # Create AML Dataset and register it into Workspace
        example_data = 'https://raw.githubusercontent.com/heber-augusto/Nanodegree_Azure_ML_Engineer_CapstoneProject/master/data/heart_failure_clinical_records_dataset.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,60.833893,0.431438,581.839465,0.41806,38.083612,0.351171,263358.029264,1.39388,136.625418,0.648829,0.32107,130.26087,0.32107
std,11.894809,0.496107,970.287881,0.494067,11.834841,0.478136,97804.236869,1.03451,4.412477,0.478136,0.46767,77.614208,0.46767
min,40.0,0.0,23.0,0.0,14.0,0.0,25100.0,0.5,113.0,0.0,0.0,4.0,0.0
25%,51.0,0.0,116.5,0.0,30.0,0.0,212500.0,0.9,134.0,0.0,0.0,73.0,0.0
50%,60.0,0.0,250.0,0.0,38.0,0.0,262000.0,1.1,137.0,1.0,0.0,115.0,0.0
75%,70.0,1.0,582.0,1.0,45.0,1.0,303500.0,1.4,140.0,1.0,1.0,203.0,1.0
max,95.0,1.0,7861.0,1.0,80.0,1.0,850000.0,9.4,148.0,1.0,1.0,285.0,1.0


In [4]:
dataset.take(5).to_pandas_dataframe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [7]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "cpu-cluster-hd"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Creating a new compute target...
Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-01-20T12:26:20.285000+00:00', 'errors': None, 'creationTime': '2021-01-20T12:26:14.195746+00:00', 'modifiedTime': '2021-01-20T12:26:30.228694+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


In [8]:
project_folder = './heart-failure-hyperdrive'
os.makedirs(project_folder, exist_ok=True)

In [52]:
shutil.copy('train.py', project_folder)

'./heart-failure-hyperdrive/train.py'

### Create an environment

Define a conda environment YAML file with your training script dependencies and create an Azure ML environment.

In [9]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.2
- scikit-learn
- pip:
  - azureml-defaults

Writing conda_dependencies.yml


In [10]:
sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')

### Configure the training job

Create a ScriptRunConfig object to specify the configuration details of your training job, including your training script, environment to use, and the compute target to run on.

In [11]:
src = ScriptRunConfig(source_directory=project_folder,
                      script='train.py',
                      arguments=['--C', 1, '--max_iter', 100, '--solver', 'lbfgs'],
                      compute_target=compute_target,
                      environment=sklearn_env)

### Submit job

Run your experiment by submitting your ScriptRunConfig object. Note that this call is asynchronous.

In [53]:
run = experiment.submit(src)

### Monitor your run

You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes.

In [54]:
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [55]:
run.wait_for_completion(show_output=True)

RunId: heartfailure_hyperdrive_1611152920_1a59b2fb
Web View: https://ml.azure.com/experiments/heartfailure_hyperdrive/runs/heartfailure_hyperdrive_1611152920_1a59b2fb?wsid=/subscriptions/48a74bb7-9950-4cc1-9caa-5d50f995cc55/resourcegroups/aml-quickstarts-135475/workspaces/quick-starts-ws-135475

Streaming azureml-logs/55_azureml-execution-tvmps_877fadb38bc33d5c69498b27bed4bceab1681278b7b1887b38c6d1529d3b07dc_d.txt

2021-01-20T14:32:55Z Starting output-watcher...
2021-01-20T14:32:55Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
2021-01-20T14:32:56Z Executing 'Copy ACR Details file' on 10.0.0.5
2021-01-20T14:32:56Z Copy ACR Details file succeeded on 10.0.0.5. Output: 
>>>   
>>>   
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_b1e1549112fc4b4d2d32f3d6c4b8a2b3
8e097b52bfb8: Pulling fs layer
a613a9b4553c: Pulling fs layer
acc000f01536: Pulling fs layer
73eef93b7466: Pulling fs layer
d5a54c1fb97f: Pulling fs layer
1536f6ca931b: Pulling fs la

{'runId': 'heartfailure_hyperdrive_1611152920_1a59b2fb',
 'target': 'cpu-cluster-hd',
 'status': 'Completed',
 'startTimeUtc': '2021-01-20T14:32:50.017162Z',
 'endTimeUtc': '2021-01-20T14:35:14.754421Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': '3a6b41b9-e905-4deb-aaae-8891436ff811',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'train.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--C', '1', '--max_iter', '100', '--solver', 'lbfgs'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'cpu-cluster-hd',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'jobName': None,
  'maxRunDurationSeconds': 2592000,
  'nodeCount': 1,
  'priority': None,
  'credentialPassthrough': False,
  'environment': {'name': 'sklearn-env',
   'version':

## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.


In [58]:
# Specify a Policy
# The BanditPolicy basically states to check the job every 2 iterations. 
# If the primary metric (defined later) falls outside of the top 10% range, Azure ML 
# terminate the job. This saves us from continuing to explore hyperparameters that don't 
# show promise of helping reach our target metric.
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

# Specify parameter sampler
param_sampling = RandomParameterSampling( {
    "--C": choice(0.01, 0.1, 1, 10, 100),
    "--max_iter": choice(100, 200, 500),
    "--solver": choice('newton-cg', 'lbfgs', 'liblinear'),    
    }
)

# Create a SKLearn estimator for use with train.py
# estimator = ### YOUR CODE HERE ###
# I decided not to use the estimator because the documentation (https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.sklearn.sklearn) says that it is deprecated

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_run_config = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=param_sampling,
    policy=policy,
    primary_metric_name='Accuracy',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=20,
    max_concurrent_runs=4)





In [59]:
# Submit hyperdrive run to the experiment and show run details with the widget.

# start the HyperDrive run
hyperdrive_run = experiment.submit(hyperdrive_run_config)

## Run Details

`RunDetails` widget to show the different experiments.

In [60]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [61]:
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_9f7ed0b2-a1f6-40fc-ab44-6ec1d7dc88a2
Web View: https://ml.azure.com/experiments/heartfailure_hyperdrive/runs/HD_9f7ed0b2-a1f6-40fc-ab44-6ec1d7dc88a2?wsid=/subscriptions/48a74bb7-9950-4cc1-9caa-5d50f995cc55/resourcegroups/aml-quickstarts-135475/workspaces/quick-starts-ws-135475

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-01-20T14:40:07.681837][API][INFO]Experiment created<END>\n""<START>[2021-01-20T14:40:08.221952][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-01-20T14:40:08.502741][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-01-20T14:40:09.3559137Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_9f7ed0b2-a1f6-40fc-ab44-6ec1d7dc88a2
Web View: https://ml.azure.com/experiments/heartfailure_hyperdrive/runs/HD_9f7ed0b2-a1f6-40fc-ab44-6ec1d7dc88a2?wsid=/sub

{'runId': 'HD_9f7ed0b2-a1f6-40fc-ab44-6ec1d7dc88a2',
 'target': 'cpu-cluster-hd',
 'status': 'Completed',
 'startTimeUtc': '2021-01-20T14:40:06.91243Z',
 'endTimeUtc': '2021-01-20T14:54:20.76421Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '1e6cf447-0577-459e-855a-5c2f918d35a0',
  'score': '0.8181818181818182',
  'best_child_run_id': 'HD_9f7ed0b2-a1f6-40fc-ab44-6ec1d7dc88a2_1',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg135475.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_9f7ed0b2-a1f6-40fc-ab44-6ec1d7dc88a2/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=clmuOXQdohcVV42PxRkHzOenrFjS7T5dNulAFsOOc0c%3D&st=2021-01-20T14%3A44%3A29Z&se=2021-01-20T22%3A54%3A29Z&sp=r'}}

In [62]:
assert(hyperdrive_run.get_status() == "Completed")

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [63]:
import joblib
from sklearn.linear_model import LogisticRegression
from azureml.data.dataset_factory import TabularDatasetFactory
from train import clean_data
from sklearn.model_selection import train_test_split

# Create TabularDataset using TabularDatasetFactory
dataset_path = 'https://raw.githubusercontent.com/heber-augusto/Nanodegree_Azure_ML_Engineer_CapstoneProject/master/data/heart_failure_clinical_records_dataset.csv'
ds = TabularDatasetFactory.from_delimited_files(path = dataset_path)

x, y = clean_data(ds)

# Split data into train and test sets.
x_train, x_test, y_train, y_test = train_test_split(x, y , test_size=0.33, random_state=42)

best_run = hyperdrive_run.get_best_run_by_primary_metric()
arguments = best_run.get_details()['runDefinition']['arguments']
print(arguments)
print(best_run.get_file_names())

model = LogisticRegression(
    C=int(arguments[1]), 
    max_iter=int(arguments[3]), 
    solver=arguments[5])
model.fit(x_train, y_train)
filename = 'best_model.sav'

joblib.dump(model, filename)

['--C', '1', '--max_iter', '100', '--solver', 'lbfgs', '--C', '0.01', '--max_iter', '100', '--solver', 'newton-cg']
['azureml-logs/55_azureml-execution-tvmps_39de84516b479523e2dbbf9df1966e4d5a2320a221c471bcaa7cf22578ede273_d.txt', 'azureml-logs/65_job_prep-tvmps_39de84516b479523e2dbbf9df1966e4d5a2320a221c471bcaa7cf22578ede273_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_39de84516b479523e2dbbf9df1966e4d5a2320a221c471bcaa7cf22578ede273_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/100_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/engine_spans_l_d7c7af99-04b0-4f23-aa85-d2724c28becf.jsonl', 'logs/azureml/dataprep/python_span_l_d7c7af99-04b0-4f23-aa85-d2724c28becf.jsonl', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log']


['best_model.sav']

In [69]:

from sklearn.metrics import confusion_matrix
import pandas as pd
ypred = model.predict(x_test)
cm = confusion_matrix(y_test, ypred)
# Visualize the confusion matrix
pd.DataFrame(cm).style.background_gradient(cmap='Blues', low=0, high=0.9)

Unnamed: 0,0,1
0,54,3
1,20,22


In [None]:
#TODO: Save the best model

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [None]:
from azureml.core import Workspace
from azureml.core.webservice import Webservice

# Requires the config to be downloaded first to the current working directory
ws = Workspace.from_config()

# Set with the deployment name
name = "deployed-best-model"

# load existing web service
service = Webservice(name=name, workspace=ws)

# enable app insights
service.update(enable_app_insights=True)


logs = service.get_logs()

for line in logs.split('\n'):
    print(line)

TODO: In the cell below, send a request to the web service you deployed to test it.

TODO: In the cell below, print the logs of the web service and delete the service