# Hyperparameter Tuning using HyperDrive



In [1]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.core.experiment import Experiment

## Dataset



In [2]:
ws = Workspace.from_config()
experiment_name = 'Avinash-MLH'

experiment=Experiment(ws, experiment_name)

In [3]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

aml_compute_cluster = "Avinash-Test"

try:
    compute_target = ComputeTarget(workspace=ws,name = aml_compute_cluster)
    print('Found Exisitng cluster, use it ')
except ComputeTargetException:

    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',max_nodes = 4,)
    compute_target = ComputeTarget.create(ws,aml_compute_cluster)


compute_target.wait_for_completion(show_output=True)

Found Exisitng cluster, use it 
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
key = "house_sales"
description_text = "house sales prediction dataset"

if key in ws.datasets.keys():
    dataset = ws.datasets[key]

df = dataset.to_pandas_dataframe()

## Hyperdrive Configuration


In [5]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(evaluation_interval = 1, slack_factor = 0.2, delay_evaluation = 5)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling({
    "--n_estimators": choice(100,120,140),
    "--max_depth": choice(3,5,8),
    "--min_samples_split": choice(2,4,8)
})

if "training" not in os.listdir():
    os.mkdir("./training")

#TODO: Create your estimator and hyperdrive config
estimator = SKLearn(source_directory = './',compute_target = aml_compute_cluster,entry_script = 'train.py')

hyperdrive_run_config = HyperDriveConfig(estimator = estimator,
                                        hyperparameter_sampling = param_sampling,
                                        policy = early_termination_policy,
                                        primary_metric_name = 'accuracy',
                                        primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
                                        max_total_runs = 8,
                                        max_concurrent_runs = 4)

## Run Details


In [6]:
#TODO: Submit your experiment

run = experiment.submit(hyperdrive_run_config,show_output = True)




In [9]:
RunDetails(run).show()
run.get_status()
run.wait_for_completion()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

{'runId': 'HD_20e82ed4-9d58-411a-9202-454835aa3e4d',
 'target': 'Avinash-Test',
 'status': 'Completed',
 'startTimeUtc': '2020-12-06T19:06:07.362111Z',
 'endTimeUtc': '2020-12-06T19:31:29.885767Z',
 'properties': {'primary_metric_config': '{"name": "accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '8a5b9ad4-6582-4ed7-bf91-f12e5ad239dc',
  'score': '0.8256059939576684',
  'best_child_run_id': 'HD_20e82ed4-9d58-411a-9202-454835aa3e4d_6',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg129522.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_20e82ed4-9d58-411a-9202-454835aa3e4d/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=RqDLFtGFkmQO6iT0wa0cbbEzODY%2BhQvfzQZSqgeuoNA%3D&st=2020-12-06T19%3A21%3A33Z&se=2020-12-07T03%3A31%3A33Z&sp=r'}}

## Best Model


In [10]:
best_run = run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
best_run_metrics

{'Number of Estimators': 120,
 'maximum depth': 8,
 'minimum samples split': 8,
 'accuracy': 0.8256059939576684,
 'MAE': 87983.16749381767}

In [11]:

print('Best Run ID', best_run.id)
print('\n MAE', best_run_metrics['MAE'])



Best Run ID HD_20e82ed4-9d58-411a-9202-454835aa3e4d_6

 MAE 87983.16749381767
