# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import os
import numpy as np
import pandas as pd
import pkg_resources
import joblib
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from matplotlib import pyplot as plt
from sklearn import datasets
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Workspace, ScriptRunConfig, Environment


## Dataset
TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [2]:
ws = Workspace.from_config()
experiment_name = 'HyperDrive_Heart-Failure-experiment'

experiment = Experiment(ws, experiment_name)
display(experiment)


Name,Workspace,Report Page,Docs Page
HyperDrive_Heart-Failure-experiment,quick-starts-ws-243057,Link to Azure Machine Learning studio,Link to Documentation


In [3]:
amlcompute_cluster_name = "aml-compute1"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size='Standard_DS3_v2',
        min_nodes=1,
        max_nodes=4
    )
    compute_target = ComputeTarget.create(
        ws,
        amlcompute_cluster_name,
        compute_config
    )

compute_target.wait_for_completion(show_output=True)
# For a more detailed view of current AmlCompute status, use get_status().


Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
found = False
key = "Heart-Failure"
description_text = "Heart Failure data set"

if key in ws.datasets.keys():
    found = True
    dataset = ws.datasets[key]

if not found:
    # Create AML Dataset and register it into Workspace
    example_data = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv'
    dataset = Dataset.Tabular.from_delimited_files(example_data)
    # Register Dataset in Workspace
    dataset = dataset.register(workspace=ws,
                               name=key,
                               description=description_text)

df = dataset.to_pandas_dataframe()
df.describe()


Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,60.833893,0.431438,581.839465,0.41806,38.083612,0.351171,263358.029264,1.39388,136.625418,0.648829,0.32107,130.26087,0.32107
std,11.894809,0.496107,970.287881,0.494067,11.834841,0.478136,97804.236869,1.03451,4.412477,0.478136,0.46767,77.614208,0.46767
min,40.0,0.0,23.0,0.0,14.0,0.0,25100.0,0.5,113.0,0.0,0.0,4.0,0.0
25%,51.0,0.0,116.5,0.0,30.0,0.0,212500.0,0.9,134.0,0.0,0.0,73.0,0.0
50%,60.0,0.0,250.0,0.0,38.0,0.0,262000.0,1.1,137.0,1.0,0.0,115.0,0.0
75%,70.0,1.0,582.0,1.0,45.0,1.0,303500.0,1.4,140.0,1.0,1.0,203.0,1.0
max,95.0,1.0,7861.0,1.0,80.0,1.0,850000.0,9.4,148.0,1.0,1.0,285.0,1.0


## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

**Termination Policy (BanditPolicy):**

    * The chosen early termination policy is BanditPolicy.
    * The policy evaluates runs every 2 intervals and stops runs that underperform the best run by more than 10% (slack factor of 0.1).

**Hyperparameter Sampling (RandomParameterSampling):**

    * RandomParameterSampling is used for hyperparameter tuning.
    * The script randomly samples hyperparameter values for two parameters: --C and --max_iter.
    * Values for --C include 0.001, 0.01, 0.1, 1, 10, 20, 50, and 100.
    * Values for --max_iter include 50 and 100.

**Environment:**

    * The code references an existing environment named "AzureML-Tutorial."
    * Environments define the software dependencies for the training job.

**ScriptRunConfig:**

    * ScriptRunConfig specifies the configuration details for the training job.
    * It defines the source directory, the training script command, the compute target, and the environment settings.

**HyperDriveConfig:**

    * HyperDriveConfig combines all elements.
    * It sets the run configuration, hyperparameter sampling, and termination policy.
    * The primary metric for optimization is 'Accuracy.'
    * The goal is to 'MAXIMIZE' the primary metric (maximizing accuracy).
    * A maximum of 10 runs can be executed.
    * Up to 4 runs can be executed concurrently.

In [13]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
policy = BanditPolicy(
    evaluation_interval=2,
    slack_factor=0.1
)

# TODO: Create the different params that you will be using during training
ps = RandomParameterSampling(
    {
        '--C': choice(0.001, 0.01, 0.1, 1, 10, 20, 50, 100),
        '--max_iter': choice(50, 100)
    }
)

env = Environment.get(workspace=ws, name="AzureML-Tutorial")

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(
    source_directory='.',
    command=['python', 'train.py'],
    compute_target=amlcompute_cluster_name,
    environment=env
)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy. ### YOUR CODE HERE ###
hyperdrive_config = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name='Accuracy',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=10,
    max_concurrent_runs=4
)

## Run Details
TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [22]:
hyperdrive_run = experiment.submit(hyperdrive_config)

# Monitor HyperDrive runs
# You can monitor the progress of the runs with the following Jupyter widget
RunDetails(hyperdrive_run).show()

hyperdrive_run.wait_for_completion(show_output=True)


_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_e13ff042-def6-4dc5-8262-b9d950d77c3e
Web View: https://ml.azure.com/runs/HD_e13ff042-def6-4dc5-8262-b9d950d77c3e?wsid=/subscriptions/61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30/resourcegroups/aml-quickstarts-243057/workspaces/quick-starts-ws-243057&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

[2023-10-21T07:33:58.224658][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space
[2023-10-21T07:34:09.2970941Z][SCHEDULER][INFO]Scheduling job, id='HD_e13ff042-def6-4dc5-8262-b9d950d77c3e_1' 
[2023-10-21T07:34:09.3606069Z][SCHEDULER][INFO]Scheduling job, id='HD_e13ff042-def6-4dc5-8262-b9d950d77c3e_0' 
[2023-10-21T07:34:09.4267873Z][SCHEDULER][INFO]Scheduling job, id='HD_e13ff042-def6-4dc5-8262-b9d950d77c3e_2' 
[2023-10-21T07:34:09.5120009Z][SCHEDULER][INFO]Scheduling job, id='HD_e13ff042-def6-4dc5-8262-b9d950d77c3e_3' 
[2023-10-21T07:34:09.461699][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution t

{'runId': 'HD_e13ff042-def6-4dc5-8262-b9d950d77c3e',
 'target': 'aml-compute1',
 'status': 'Completed',
 'startTimeUtc': '2023-10-21T07:33:47.656368Z',
 'endTimeUtc': '2023-10-21T07:38:01.206084Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"Accuracy","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'b3ed0aae-66ee-4693-bc25-65e631d70b68',
  'user_agent': 'python/3.8.5 (Linux-5.15.0-1040-azure-x86_64-with-glibc2.10) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.51.0',
  'space_size': '16',
  'score': '0.7676767676767676',
  'best_child_run_id': 'HD_e13ff042-def6-4dc5-8262-b9d950d77c3e_0',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_e13ff042-def6-4dc5-8262-b9d950d77c3e_0'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  'telemetryValues': {'amlClient

## Best Model
TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [30]:
best_hyperdrive_run = hyperdrive_run.get_best_run_by_primary_metric()
print("best run details :", best_hyperdrive_run.get_details())
print("best run file names :", best_hyperdrive_run.get_file_names())
print("best run metrics :", best_hyperdrive_run.get_metrics())


best run details : {'runId': 'HD_e13ff042-def6-4dc5-8262-b9d950d77c3e_0', 'target': 'aml-compute1', 'status': 'Completed', 'startTimeUtc': '2023-10-21T07:34:25.407052Z', 'endTimeUtc': '2023-10-21T07:34:45.86781Z', 'services': {}, 'properties': {'_azureml.ComputeTargetType': 'amlctrain', 'ContentSnapshotId': 'b3ed0aae-66ee-4693-bc25-65e631d70b68', 'ProcessInfoFile': 'azureml-logs/process_info.json', 'ProcessStatusFile': 'azureml-logs/process_status.json'}, 'inputDatasets': [], 'outputDatasets': [], 'runDefinition': {'script': None, 'command': 'python train.py', 'useAbsolutePath': False, 'arguments': [], 'sourceDirectoryDataStore': None, 'framework': 'Python', 'communicator': 'None', 'target': 'aml-compute1', 'dataReferences': {}, 'data': {}, 'outputData': {}, 'datacaches': [], 'jobName': None, 'maxRunDurationSeconds': 2592000, 'nodeCount': 1, 'instanceTypes': [], 'priority': None, 'credentialPassthrough': False, 'identity': None, 'environment': {'name': 'AzureML-Tutorial', 'version': '8

In [31]:
# TODO: Save the best model
# Retrieve the model file path
model_file_path = best_hyperdrive_run.get_file_names()

# Print the list of files in the run
print("Files in the best run:", model_file_path)

# Download the model file to a local directory
best_hyperdrive_run.download_file(
    model_file_path[0], output_file_path='hd_best_model.pkl')

Files in the best run: ['logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/rslex.log', 'outputs/hyper-model.pkl', 'system_logs/cs_capability/cs-capability.log', 'system_logs/hosttools_capability/hosttools-capability.log', 'system_logs/lifecycler/execution-wrapper.log', 'system_logs/lifecycler/lifecycler.log', 'system_logs/metrics_capability/metrics-capability.log', 'system_logs/snapshot_capability/snapshot-capability.log', 'user_logs/std_log.txt']
