# Hyperparameter Tuning using HyperDrive

In [1]:
from azureml.core import Workspace, Experiment, Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.parameter_expressions import choice
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.sklearn import SKLearn

import os

## Cancel existing Runs

In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

experiment_name = 'capstone-heart-failure-prediction'
experiment=Experiment(ws, experiment_name)

for run in experiment.get_runs():
    print(run.id)
    if run.status=="Running":
        run.cancel()

ws_udacity_capstone_v2
UdacityMLAzureCapstoneV2
eastus2
ca1598e0-85dc-47d5-b06d-41b5342b4989


## Create New Cluster / Use Existing Cluster

In [3]:
# Create compute cluster and choose a name for it
cpu_cluster_name = "CapstoneV1"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    print('Creating a new compute cluster...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_v2', 
                                                            min_nodes=1, 
                                                            max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

# Can poll for a minimum number of nodes and for a specific timeout. 
# If no min node count is provided it uses the scale settings for the cluster.
compute_target.wait_for_completion(show_output=True)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing cluster, use it.

Running
{'errors': [], 'creationTime': '2021-04-25T18:53:25.316093+00:00', 'createdBy': {'userObjectId': '2f5770ca-7bf7-4ecc-bd4b-14652b1bbf0c', 'userTenantId': '3814e260-63cb-43a8-82ce-e862c309e004', 'userName': 'Abhi Ojha'}, 'modifiedTime': '2021-04-25T18:55:27.349216+00:00', 'state': 'Running', 'vmSize': 'STANDARD_DS12_V2'}


## Dataset

This dataset used in this project is taken from [Kaggle](https://www.kaggle.com/andrewmvd/heart-failure-clinical-data). It consists of 12 distinct features and 1 target as summarized below:
- **Input features** - Age, Anaemia, Creatinine-phosphokinase, Diabetes, Ejection_fraction, High_blood_pressure, Platelets, Serum_creatinine, Serum_sodium, Sex, Smoking, Time
- **Target** - DEATH_EVENT

We will use this dataset for creating a model to predict mortality caused by heart failure.



In [4]:
local_data_path = 'data/heart_failure_clinical_records_dataset.csv'

# get the datastore to upload prepared data
datastore = ws.get_default_datastore()

# upload the local file from src_dir to the target_path in datastore
datastore.upload(src_dir='data', target_path='data')

# create a dataset referencing the cloud location
heart_failure_ds= Dataset.Tabular.from_delimited_files(path = [(datastore, (local_data_path))])

# register dataset
heart_failure_ds = heart_failure_ds.register(workspace=ws, name='heart_failure_ds', create_new_version=True)

Uploading an estimated of 1 files
Uploading data/heart_failure_clinical_records_dataset.csv
Uploaded data/heart_failure_clinical_records_dataset.csv, 1 files out of an estimated total of 1
Uploaded 1 files


## Hyperdrive Configuration

I am using sklearn's Logistic Regression model as it is a good beginner model for classification tasks. Since this is a univariate classification problem with moderate amount of input features, I think a well tuned Logistic Regression would do a decent job.

**Parameter Sampler**

The hyperparameter sampler is defined below:

```python
ps = RandomParameterSampling(
    {
        '--C' : choice(0.001,0.01,0.1,1,10,100),
        '--max_iter': choice(50,100,200)
    }
)
```

Here, `C` is the regularization strength, and `max_iter` defines the total number of iterations. Some options available in the Azure sampling library are `RandomParameterSampling`, `GridParameterSampling`, `BayesianParameterSamping`, etc. Out of these, I used `RandomParameterSampling` as it is fast and supports early termination  for low-performance runs.

**Early Stopping Policy**

I used the BanditPolicy for early stopping, as below:

```python
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)
```

Here,`evaluation_interval` is an optional argument that represents the frequency for applying the policy. The `slack_factor` argument defines the amount of slack allowed with respect to the best performing training run.

### Hyperdrive config parameters 
- *primary_metric_name*: This is the primary metric reported by experiment runs. I have set it to `Accuracy`.
- *primary_metric_goal*: Since we want to get a high accuracy value, this is set to `MAXIMIZE` using `PrimaryMetricGoal.MAXIMIZE`.
- *policy*: This is the `BanditPolicy` defined above.
- *estimator*: Azure ML Estimators make it easy to train models based on popular machine learning frameworks. There are estimator classes for Scikit-learn, PyTorch, TensorFlow, etc. In this example we are using SKLearn estimator which further uses `train.py` as the entry script.
- *max_total_runs*: This is the upper bound on the total number of runs for this experiment. I have set this value to 16, which means that at max 16 runs will be created.
- *max_concurrent_runs*: The max number of runs that can be executed concurrently. Its value is set to 4.

In [5]:
# Create an early termination policy
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

ps = RandomParameterSampling(
    {
        '--C' : choice(0.01,0.1,1,10,100),
        '--max_iter': choice(50,100)
    }
)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory = "./",
            compute_target=compute_target,
            vm_size='STANDARD_DS3_V2',
            entry_script="train.py")

hyperdrive_run_config = HyperDriveConfig(hyperparameter_sampling=ps, 
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     policy=early_termination_policy,
                                     estimator=est,
                                     max_total_runs=20,
                                     max_concurrent_runs=10)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


## Run Details

In [6]:
# Start the HyperDrive run
hyperdrive_run = experiment.submit(hyperdrive_run_config)

# Monitor HyperDrive runs 
RunDetails(hyperdrive_run).show()



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [7]:
hyperdrive_run.wait_for_completion(show_output=True)
assert(hyperdrive_run.get_status() == "Completed")

RunId: HD_723dea94-8708-4a40-8457-0f576dc1e5f1
Web View: https://ml.azure.com/runs/HD_723dea94-8708-4a40-8457-0f576dc1e5f1?wsid=/subscriptions/ca1598e0-85dc-47d5-b06d-41b5342b4989/resourcegroups/UdacityMLAzureCapstoneV2/workspaces/ws_udacity_capstone_v2&tid=3814e260-63cb-43a8-82ce-e862c309e004

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-04-25T18:59:21.317288][GENERATOR][INFO]Trying to sample '10' jobs from the hyperparameter space<END>\n""<START>[2021-04-25T18:59:21.667950][GENERATOR][INFO]Successfully sampled '10' jobs, they will soon be submitted to the execution target.<END>\n""<START>[2021-04-25T18:59:20.620287][API][INFO]Experiment created<END>\n"

Execution Summary
RunId: HD_723dea94-8708-4a40-8457-0f576dc1e5f1
Web View: https://ml.azure.com/runs/HD_723dea94-8708-4a40-8457-0f576dc1e5f1?wsid=/subscriptions/ca1598e0-85dc-47d5-b06d-41b5342b4989/resourcegroups/UdacityMLAzureCapstoneV2/workspaces/ws_udacity_capstone_v2&tid=3814e260-63cb-43a8-82ce-e862c309e004



In [11]:
# print hyperparameters for all the child runs 
print(hyperdrive_run.get_hyperparameters())

{'HD_723dea94-8708-4a40-8457-0f576dc1e5f1_0': '{"--C": 100, "--max_iter": 100}', 'HD_723dea94-8708-4a40-8457-0f576dc1e5f1_1': '{"--C": 10, "--max_iter": 100}', 'HD_723dea94-8708-4a40-8457-0f576dc1e5f1_2': '{"--C": 1, "--max_iter": 50}', 'HD_723dea94-8708-4a40-8457-0f576dc1e5f1_3': '{"--C": 0.01, "--max_iter": 50}', 'HD_723dea94-8708-4a40-8457-0f576dc1e5f1_4': '{"--C": 0.1, "--max_iter": 50}', 'HD_723dea94-8708-4a40-8457-0f576dc1e5f1_5': '{"--C": 10, "--max_iter": 50}', 'HD_723dea94-8708-4a40-8457-0f576dc1e5f1_6': '{"--C": 100, "--max_iter": 50}', 'HD_723dea94-8708-4a40-8457-0f576dc1e5f1_7': '{"--C": 0.01, "--max_iter": 100}', 'HD_723dea94-8708-4a40-8457-0f576dc1e5f1_8': '{"--C": 0.1, "--max_iter": 100}', 'HD_723dea94-8708-4a40-8457-0f576dc1e5f1_9': '{"--C": 1, "--max_iter": 100}'}


## Best Model

In [10]:
import joblib
best_run = hyperdrive_run.get_best_run_by_primary_metric()

print("Best run metrics :",best_run.get_metrics())
print("Best run file names :",best_run.get_file_names())

print("Best run details:", best_run.get_details())

Best run metrics : {'Regularization Strength:': 100.0, 'Max iterations:': 100, 'Accuracy': 0.8333333333333334}
Best run file names : ['azureml-logs/20_image_build_log.txt', 'azureml-logs/55_azureml-execution-tvmps_66b718f5e1843639b118568f7e6aebb530694b3f854c7329641c60edb0a4116e_d.txt', 'azureml-logs/65_job_prep-tvmps_66b718f5e1843639b118568f7e6aebb530694b3f854c7329641c60edb0a4116e_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_66b718f5e1843639b118568f7e6aebb530694b3f854c7329641c60edb0a4116e_d.txt', 'logs/azureml/103_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/model.pkl']
Best run details: {'runId': 'HD_723dea94-8708-4a40-8457-0f576dc1e5f1_0', 'target': 'CapstoneV1', 'status': 'Completed', 'startTimeUtc': '2021-04-25T19:05:11.42981Z', 'endTimeUtc': '2021-04-25T19:06:29.157281Z', 'properties': {'_azureml.ComputeTargetType': 'amlcompute', 'ContentSnapshotId': '9fe5036d-d200-43b8-affe-ebf6a68f7c47', 'Process

In [15]:
# Register and Save the best model
best_run.register_model(model_name = "best_run_hyperdrive.pkl", model_path = './outputs/')

print(best_run)

best_run.download_file('outputs/model.pkl', 'best_hyperdrive_model.pkl')

Run(Experiment: capstone-heart-failure-prediction,
Id: HD_723dea94-8708-4a40-8457-0f576dc1e5f1_0,
Type: azureml.scriptrun,
Status: Completed)
