# Hyperparameter Tuning using HyperDrive



In [1]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.core.experiment import Experiment

## Dataset



In [2]:
ws = Workspace.from_config()
experiment_name = 'Avinash-MLH'

experiment=Experiment(ws, experiment_name)

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code AX594F7VC to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.


In [3]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

aml_compute_cluster = "Avinash-Test"

try:
    compute_target = ComputeTarget(workspace=ws,name = aml_compute_cluster)
    print('Found Exisitng cluster, use it ')
except ComputeTargetException:

    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',max_nodes = 4,)
    compute_target = ComputeTarget.create(ws,aml_compute_cluster)


compute_target.wait_for_completion(show_output=True)

Found Exisitng cluster, use it 
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
key = "house_sales"
description_text = "house sales prediction dataset"

if key in ws.datasets.keys():
    dataset = ws.datasets[key]

df = dataset.to_pandas_dataframe()

## Hyperdrive Configuration


In [5]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(evaluation_interval = 1, slack_factor = 0.2, delay_evaluation = 5)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling({
    "--n_estimators": choice(100,120,140),
    "--max_depth": choice(3,5,8),
    "--min_samples_split": choice(2,4,8)
})

if "training" not in os.listdir():
    os.mkdir("./training")

#TODO: Create your estimator and hyperdrive config
estimator = SKLearn(source_directory = './',compute_target = aml_compute_cluster,entry_script = 'train.py')

hyperdrive_run_config = HyperDriveConfig(estimator = estimator,
                                        hyperparameter_sampling = param_sampling,
                                        policy = early_termination_policy,
                                        primary_metric_name = 'accuracy',
                                        primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
                                        max_total_runs = 8,
                                        max_concurrent_runs = 4)

## Run Details


In [6]:
#TODO: Submit your experiment

run = experiment.submit(hyperdrive_run_config,show_output = True)
RunDetails(run).show()
run.get_status()
run.wait_for_completion()



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

{'runId': 'HD_fb9b0d64-b8b4-4f54-ba6a-3e54a5589972',
 'target': 'Avinash-Test',
 'status': 'Completed',
 'startTimeUtc': '2020-12-07T05:40:05.977651Z',
 'endTimeUtc': '2020-12-07T05:55:35.107009Z',
 'properties': {'primary_metric_config': '{"name": "accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '5dd2fb36-6b39-4825-abf8-2a22fd5e88f4',
  'score': '0.8247214498035966',
  'best_child_run_id': 'HD_fb9b0d64-b8b4-4f54-ba6a-3e54a5589972_3',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg129587.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_fb9b0d64-b8b4-4f54-ba6a-3e54a5589972/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=MNVE%2BHmMMQoXeNLGl7gKmGkqm0Covv%2BgAICBk9ueoFc%3D&st=2020-12-07T05%3A45%3A46Z&se=2020-12-07T13%3A55%3A46Z&sp=r'}}

## Best Model


In [7]:
best_run = run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
best_run_metrics

{'Number of Estimators': 140,
 'maximum depth': 8,
 'minimum samples split': 2,
 'accuracy': 0.8247214498035966,
 'MAE': 87945.09567813709}

In [8]:

print('Best Run ID', best_run.id)
print('\n MAE', best_run_metrics['MAE'])



Best Run ID HD_fb9b0d64-b8b4-4f54-ba6a-3e54a5589972_3

 MAE 87945.09567813709


In [12]:

best_run.register_model(model_path = './outputs',model_name = 'Bestmodel')

Model(workspace=Workspace.create(name='quick-starts-ws-129587', subscription_id='30d182b7-c8c4-421c-8fa0-d3037ecfe6d2', resource_group='aml-quickstarts-129587'), name=Bestmodel, id=Bestmodel:1, version=1, tags={}, properties={})