# Hyperparameter Tuning using HyperDrive

In [1]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice, quniform
from azureml.core import Workspace, Experiment, ScriptRunConfig
import os

## Dataset

In [2]:
from azureml.core import Workspace, Experiment

experiment_name = "Hyperdrive"

compute_cluster_name = "worker"
vm_size = 'STANDARD_D2_V2'
max_nodes = 4

project_folder = './'
training_script_name = 'train.py'
outputs_folder_path = "outputs"
model_file_name = "model.pkl"

primary_metric_name = 'MRSE'
hyperdrive_model_name = "hd_classifier"


ws = Workspace.from_config()
exp = Experiment(workspace=ws, name=experiment_name)

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: capstone
Azure region: westeurope
Subscription id: f08c5f25-28be-4c21-993c-ad64d5c84d3a
Resource group: ML


## Hyperdrive Configuration

In [3]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

amlcompute_cluster_name = "worker"

try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

myenv = Environment.get(workspace=ws, name="AzureML-Minimal").clone('capstone-hyperdrive-env')
myenv.docker.enabled = True

myenv.python.conda_dependencies.add_pip_package('skits')


In [5]:
from azureml.train.hyperdrive.parameter_expressions import quniform

ps = RandomParameterSampling({
    '--param_horizon': quniform(1, 36, 1),
    '--param_datapoints_in_past_as_features': quniform(1, 36, 1),
    '--param_test_size':choice(10, 20),
    '--param_n_jobs':quniform(1, 32, 1),
    '--param_n_estimators': quniform(1, 5000, 1)
    })


policy = BanditPolicy(evaluation_interval = 2, slack_factor = 0.1)

est = ScriptRunConfig(
        source_directory=project_folder,
        script=training_script_name,
        arguments=[
                '--param_horizon',5,
                '--param_datapoints_in_past_as_features',5,
                '--param_test_size',10,
                '--param_n_jobs',4,
                '--param_n_estimators',100
        ],
        environment = myenv,
        compute_target = compute_target)

hyperdrive_config = HyperDriveConfig(run_config=est,
                                         hyperparameter_sampling=ps,
                                         policy=policy,
                                         primary_metric_name=primary_metric_name,
                                         primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
                                         max_total_runs=4,
                                         max_concurrent_runs=4)

In [6]:
hyperdrive_run = exp.submit(hyperdrive_config)


## Run Details

In [7]:
from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)
assert(hyperdrive_run.get_status() == "Completed")

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_a4d46827-2520-48be-9902-55a089b23fec
Web View: https://ml.azure.com/experiments/Hyperdrive/runs/HD_a4d46827-2520-48be-9902-55a089b23fec?wsid=/subscriptions/f08c5f25-28be-4c21-993c-ad64d5c84d3a/resourcegroups/ML/workspaces/capstone

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-02-09T21:15:10.753315][API][INFO]Experiment created<END>\n""<START>[2021-02-09T21:15:11.330892][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-02-09T21:15:11.610163][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-02-09T21:15:12.4308164Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_a4d46827-2520-48be-9902-55a089b23fec
Web View: https://ml.azure.com/experiments/Hyperdrive/runs/HD_a4d46827-2520-48be-9902-55a089b23fec?wsid=/subscriptions/f08c5f25-28be-4c21-993c-ad64d5c84d3a/resourcegrou

## Best Model

In [8]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
parameter_values = best_run.get_details()['runDefinition']['arguments']
print(parameter_values)

['--param_horizon', '5', '--param_datapoints_in_past_as_features', '5', '--param_test_size', '10', '--param_n_jobs', '4', '--param_n_estimators', '100', '--param_datapoints_in_past_as_features', '33', '--param_horizon', '28', '--param_n_estimators', '374', '--param_n_jobs', '13', '--param_test_size', '10']


In [10]:
model_parameters = dict(zip(parameter_values[::2], parameter_values[1::2]))
model_parameters['--save'] = True

In [11]:
model_parameters

{'--param_horizon': '28',
 '--param_datapoints_in_past_as_features': '33',
 '--param_test_size': '10',
 '--param_n_jobs': '13',
 '--param_n_estimators': '374',
 '--save': True}

In [None]:
from azureml.train.estimator import Estimator
model_est = ScriptRunConfig(
        source_directory=project_folder,
        script=training_script_name,
        arguments=[
                '--param_horizon',28,
                '--param_datapoints_in_past_as_features',33,
                '--param_test_size',10,
                '--param_n_jobs',13,
                '--param_n_estimators',374,
                '--save', True
        ],
        environment = myenv,
        compute_target = compute_target)      
                                        
model_run = exp.submit(model_est)
model_run.wait_for_completion(show_output=True)  