# Hyperparameter Tuning using HyperDrive

In [1]:
from azureml.core import Workspace, Experiment, Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.parameter_expressions import choice
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.sklearn import SKLearn

import os

## Cancel existing Runs

In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

experiment_name = 'capstone-heart-failure-prediction'
experiment=Experiment(ws, experiment_name)

for run in experiment.get_runs():
    print(run.id)
    if run.status=="Running":
        run.cancel()

ws_udacity_capstone
UdacityMLAzureCapstone
eastus2
ca1598e0-85dc-47d5-b06d-41b5342b4989
HD_8c336eb8-1d4c-44da-bff6-7e7dbe27ce10
HD_b66b81d7-19d8-4015-b0eb-874daf467767
HD_ce80c1a8-3d23-49b1-b29d-699dab0b8192


## Create New Cluster / Use Existing Cluster

In [4]:
# Create compute cluster and choose a name for it
cpu_cluster_name = "MLAzureCapstoneCompute"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    print('Creating a new compute cluster...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_v2', 
                                                            min_nodes=1, 
                                                            max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

# Can poll for a minimum number of nodes and for a specific timeout. 
# If no min node count is provided it uses the scale settings for the cluster.
compute_target.wait_for_completion(show_output=True)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing cluster, use it.

Running
{'errors': [], 'creationTime': '2021-04-22T16:36:06.518630+00:00', 'createdBy': {'userObjectId': '2f5770ca-7bf7-4ecc-bd4b-14652b1bbf0c', 'userTenantId': '3814e260-63cb-43a8-82ce-e862c309e004', 'userName': 'Abhi Ojha'}, 'modifiedTime': '2021-04-22T16:38:24.852462+00:00', 'state': 'Running', 'vmSize': 'STANDARD_DS3_V2'}
Uploading an estimated of 1 files
Target already exists. Skipping upload for data/heart_failure_clinical_records_dataset.csv
Uploaded 0 files


## Dataset

In [None]:
local_data_path = 'data/heart_failure_clinical_records_dataset.csv'

# get the datastore to upload prepared data
datastore = ws.get_default_datastore()

# upload the local file from src_dir to the target_path in datastore
datastore.upload(src_dir='data', target_path='data')

# create a dataset referencing the cloud location
heart_failure_ds= Dataset.Tabular.from_delimited_files(path = [(datastore, (local_data_path))])

# register dataset
heart_failure_ds = heart_failure_ds.register(workspace=ws, name='heart_failure_ds', create_new_version=True)

## Hyperdrive Configuration

I am using sklearn's Logistic Regression model as it is a good beginner model for classification tasks. Since this is a univariate classification problem with moderate amount of input features, I think a well tuned Logistic Regression would do a decent job.

**Parameter Sampler**

The hyperparameter sampler is defined below:

```python
ps = RandomParameterSampling(
    {
        '--C' : choice(0.001,0.01,0.1,1,10,100),
        '--max_iter': choice(50,100,200)
    }
)
```

Here, `C` is the regularization strength, and `max_iter` defines the total number of iterations. Some options available in the Azure sampling library are `RandomParameterSampling`, `GridParameterSampling`, `BayesianParameterSamping`, etc. Out of these, I used `RandomParameterSampling` as it is fast and supports early termination  for low-performance runs.

**Early Stopping Policy**

I used the BanditPolicy for early stopping, as below:

```python
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)
```

Here,`evaluation_interval` is an optional argument that represents the frequency for applying the policy. The `slack_factor` argument defines the amount of slack allowed with respect to the best performing training run.

In [5]:
# Create an early termination policy
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

#TODO: Create the different params that you will be using during training
ps = RandomParameterSampling(
    {
        '--C' : choice(0.01,0.1,1,10,100),
        '--max_iter': choice(50,100)
    }
)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory = "./",
            compute_target=compute_target,
            vm_size='STANDARD_DS3_V2',
            entry_script="train.py")

hyperdrive_run_config = HyperDriveConfig(hyperparameter_sampling=ps, 
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     policy=early_termination_policy,
                                     estimator=est,
                                     max_total_runs=16,
                                     max_concurrent_runs=4)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


## Run Details

In [6]:
#TODO: Submit your experiment
# Start the HyperDrive run
hyperdrive_run = experiment.submit(hyperdrive_run_config)

# Monitor HyperDrive runs 
RunDetails(hyperdrive_run).show()

hyperdrive_run.wait_for_completion(show_output=True)

assert(hyperdrive_run.get_status() == "Completed")



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_895d79fe-7a78-41ba-9364-334bb9d736c2
Web View: https://ml.azure.com/runs/HD_895d79fe-7a78-41ba-9364-334bb9d736c2?wsid=/subscriptions/ca1598e0-85dc-47d5-b06d-41b5342b4989/resourcegroups/UdacityMLAzureCapstone/workspaces/ws_udacity_capstone&tid=3814e260-63cb-43a8-82ce-e862c309e004

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-04-24T16:06:54.674968][API][INFO]Experiment created<END>\n""<START>[2021-04-24T16:06:56.330023][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-04-24T16:06:56.508971][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_895d79fe-7a78-41ba-9364-334bb9d736c2
Web View: https://ml.azure.com/runs/HD_895d79fe-7a78-41ba-9364-334bb9d736c2?wsid=/subscriptions/ca1598e0-85dc-47d5-b06d-41b5342b4989/resourcegroups/UdacityMLAzureCapstone/workspaces/ws_udacity_capstone&tid=3814e260-63cb-43a8-82ce-e862c309e004

{
  "error": {
    "code":

## Best Model

In [8]:
import joblib
best_run = hyperdrive_run.get_best_run_by_primary_metric()

print("Best run metrics :",best_run.get_metrics())
print("Best run file names :",best_run.get_file_names())

# Save the best model
best_run.download_file('outputs/model.pkl', 'best_hyperdrive_model.pkl')

Best run metrics : {'Regularization Strength:': 100.0, 'Max iterations:': 50, 'Accuracy': 0.8333333333333334}
Best run file names : ['azureml-logs/55_azureml-execution-tvmps_20e2d350932a8de361129996ec5587e285ce75d138b7e8105d5701a9a089c092_d.txt', 'azureml-logs/65_job_prep-tvmps_20e2d350932a8de361129996ec5587e285ce75d138b7e8105d5701a9a089c092_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_20e2d350932a8de361129996ec5587e285ce75d138b7e8105d5701a9a089c092_d.txt', 'logs/azureml/103_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/model.pkl']
