In [1]:
# connect to workspace
import azureml.core
from azureml.core import Workspace

ws = Workspace.from_config()
print('Ready to use Azureml {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azureml 1.13.0 to work with seed


In [2]:
# Creating folder for the script
import os

experiment_folder = 'house-price-training-AML'
os.makedirs(experiment_folder, exist_ok=True)
print('folder ready')

folder ready


In [3]:
# Preparing compute Target
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = 'cluster-ml'

try:
    # Check for existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


In [20]:
# Hyperdrive Experiment
from azureml.core import Experiment
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive import GridParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal, choice, normal
from azureml.widgets import RunDetails
import numpy as np

# sample a range of parameter values
params =GridParameterSampling(
{
   '--n_estimators': choice(50, 100, 150, 200, 250, 300, 350, 400, 450, 500) ,
   '--max_depth': choice(10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
})

# Get the training dataset
house_price_ds = ws.datasets.get('house-price')

# Create an estimator
hyper_estimator = SKLearn(source_directory = experiment_folder,
                         inputs = [house_price_ds.as_named_input('house_price')],
                         pip_packages = ['azureml-sdk'],
                         entry_script = 'house-price-training-Forest.ipynb',
                         compute_target = cluster_name)
# Stopping policy
early_termination_policy = BanditPolicy(slack_amount = 0.2,
                                       evaluation_interval = 1,
                                       delay_evaluation = 5)
# Configure Hyperdrive settings
hyperdrive = HyperDriveConfig(estimator=hyper_estimator,
                             hyperparameter_sampling=params,
                             policy = early_termination_policy,
                             primary_metric_name = "Root Mean square error",
                             primary_metric_goal = PrimaryMetricGoal.MINIMIZE,
                             max_total_runs = 6,
                             max_concurrent_runs = 4)

# Run the experiment
experiment = Experiment(workspace = ws, name = 'house_price_training_hyperdrive')
run = experiment.submit(config=hyperdrive)

# Show the status in the notebook as the experiment runs
RunDetails(run).show()
run.wait_for_completion()



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

{'runId': 'HD_c150b4ab-958a-4565-b0d4-0a587ad6e0ac',
 'target': 'cluster-ml',
 'status': 'Completed',
 'startTimeUtc': '2020-09-08T07:55:30.7381Z',
 'endTimeUtc': '2020-09-08T08:01:46.785988Z',
 'properties': {'primary_metric_config': '{"name": "Root Mean square error", "goal": "minimize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '2fa53a70-aead-408a-b80e-0b72d33d4eeb',
  'score': '27177.315460774666',
  'best_child_run_id': 'HD_c150b4ab-958a-4565-b0d4-0a587ad6e0ac_1',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://seed0661444697.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_c150b4ab-958a-4565-b0d4-0a587ad6e0ac/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=vgPBA0INZHqZy2OaNFi7ed8lhAzFNM4Zocmn5o06zIg%3D&st=2020-09-08T07%3A52%3A02Z&se=2020-09-08T16%3A02%3A02Z&sp=r'}}

In [5]:
# Determine the best run
for child_run in run.get_children_sorted_by_primary_metric():
    print(child_run)

{'run_id': 'HD_3420f029-5dd9-4c3c-8f1b-95f629b79cc5_5', 'hyperparameters': '{"--max_depth": 60, "--n_estimators": 400}', 'best_primary_metric': 27177.315460774666, 'status': 'Completed'}
{'run_id': 'HD_3420f029-5dd9-4c3c-8f1b-95f629b79cc5_4', 'hyperparameters': '{"--max_depth": 20, "--n_estimators": 200}', 'best_primary_metric': 27177.315460774666, 'status': 'Completed'}
{'run_id': 'HD_3420f029-5dd9-4c3c-8f1b-95f629b79cc5_3', 'hyperparameters': '{"--max_depth": 100, "--n_estimators": 300}', 'best_primary_metric': 27177.315460774666, 'status': 'Completed'}
{'run_id': 'HD_3420f029-5dd9-4c3c-8f1b-95f629b79cc5_2', 'hyperparameters': '{"--max_depth": 70, "--n_estimators": 50}', 'best_primary_metric': 27177.315460774666, 'status': 'Completed'}
{'run_id': 'HD_3420f029-5dd9-4c3c-8f1b-95f629b79cc5_0', 'hyperparameters': '{"--max_depth": 80, "--n_estimators": 500}', 'best_primary_metric': 27177.315460774666, 'status': 'Completed'}
{'run_id': 'HD_3420f029-5dd9-4c3c-8f1b-95f629b79cc5_1', 'hyperpar

In [21]:
best_run = run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print(' -rmse:', best_run_metrics['Root Mean square error'])
print(' -mse:', best_run_metrics['Mean square error'])
print(' -model parameters:',parameter_values[9])

Best Run Id:  HD_c150b4ab-958a-4565-b0d4-0a587ad6e0ac_1
 -rmse: 27177.315460774666
 -mse: 738606475.6544616
 -model parameters: {"n_estimators":100,"max_depth":10}
