In [1]:
import os
import urllib
import shutil
import azureml

from azureml.core import Experiment
from azureml.core import Workspace, Run
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive import GridParameterSampling
from azureml.train.hyperdrive import HyperDriveConfig
from azureml.train.hyperdrive import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice

from azureml.widgets import RunDetails

In [2]:
ws = Workspace.from_config(path="./config/ws_config.json")

In [3]:
exp = Experiment(workspace=ws, name="house_prices_prediction")

In [4]:
ds = ws.get_default_datastore()

In [5]:
ds.upload(src_dir="./data", target_path="data", overwrite=True, show_progress=True)

Uploading an estimated of 1 files
Uploading ./data/ames.csv
Uploaded ./data/ames.csv, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_3406cd66d37b416695aa9de45d9a9b87

In [6]:
cluster_name = "compute01"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print("Found existing compute target")
except ComputeTargetException:
    print("Creating a new compute target...")
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_DS12_V2", 
                                                           max_nodes=6)

    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

Found existing compute target


In [7]:
script_folder = "./scripts"

script_params = {
    "--data-folder": ds.as_mount()
}

estimator = SKLearn(source_directory=script_folder, 
                    compute_target=compute_target,
                    entry_script="train_model.py",
                    script_params=script_params
                   )

In [8]:
param_sampling = GridParameterSampling({
    "n-estimators": choice(500, 750, 1000),
    "max-depth": choice(4, 6),
    "min-samples-split": choice(2, 4),
    "learning-rate": choice(0.01, 0.001)
})

In [9]:
hyperdrive_run_config = HyperDriveConfig(estimator=estimator,
                                         hyperparameter_sampling=param_sampling,
                                         policy=None,
                                         primary_metric_name="test_MAE",
                                         primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
                                         max_total_runs=100,
                                         max_concurrent_runs=24)

In [10]:
hyperdrive_run = exp.submit(hyperdrive_run_config)

In [11]:
RunDetails(hyperdrive_run).show()

A Jupyter Widget

In [26]:
import pandas as pd

children = list(hyperdrive_run.get_children())
metricslist = {}
i = 0

for single_run in children:
    results = {k: v for k, v in single_run.get_metrics().items() if isinstance(v, float)}
    parameters = single_run.get_details()["runDefinition"]["arguments"]
    try:
        results["n-estimators"] = parameters[3]
        results["max-depth"] = parameters[5]
        results["min-samples-split"] = parameters[7]
        results["learning-rate"] = parameters[9]
    except:
        results["n-estimators"] = "N/A"
        results["max-depth"] = "N/A"
        results["min-samples-split"] = "N/A"
        results["learning-rate"] = "N/A"
    metricslist[i] = results
    i += 1

rundata = pd.DataFrame(metricslist).sort_index(1).T.sort_values(by=["test_MAE"], ascending=True)
display(rundata)

Unnamed: 0,train_MAE,train_R2,val_MAE,val_R2E,test_MAE,test_R2,n-estimators,max-depth,min-samples-split,learning-rate
14,7658.2,0.984881,14941.9,0.902769,14357.4,0.90553,1000.0,4.0,4.0,0.01
20,7631.63,0.985101,14969.4,0.902816,14426.7,0.903772,1000.0,4.0,2.0,0.01
18,8387.18,0.98162,15093.4,0.90167,14483.9,0.904528,750.0,4.0,4.0,0.01
11,8366.69,0.981793,15102.6,0.902171,14519.4,0.90282,750.0,4.0,2.0,0.01
3,9668.2,0.974861,15483.7,0.8983,14807.7,0.902815,500.0,4.0,2.0,0.01
7,9683.5,0.974706,15485.5,0.898174,14835.4,0.902834,500.0,4.0,4.0,0.01
1,3330.47,0.99728,15526.4,0.891913,15211.8,0.883335,1000.0,6.0,2.0,0.01
17,3342.1,0.997246,15394.2,0.893883,15257.5,0.882449,1000.0,6.0,4.0,0.01
13,3946.54,0.99623,15432.7,0.893581,15278.3,0.88244,750.0,6.0,4.0,0.01
10,3910.88,0.996314,15532.1,0.891308,15296.8,0.881439,750.0,6.0,2.0,0.01
