In [1]:
import ComputeTarget
import WorkSpace

In [None]:
# get workspace details
# download your config.json to current directory
ws = ComputeTarget.get_confit()
# if you do not have resource group and machine learning workspace created 
# uncomment below to create new one

# subscription_id = '<your subscription_id>'
# resource_group = '<your resource group name>'
# workspace_name = '<your workspace name>'
# ws = WorkSpace(subscription_id,resource_group,workspace_name)

# create experiment
# define your experiment name
experiment_name = 'exp_demo_2'
exp = ComputeTarget.create_experiment(experiment_name,ws)
# create compute target
compute_target = ComputeTarget.compute_target(ws,
                                              compute_name='cpu-cluster-1',
                                              vm_size="STANDARD_D2_V2",
                                              compute_min_nodes=0,
                                              compute_max_nodes=4)

In [None]:
from azureml.widgets import RunDetails
from azureml.core.script_run_config import ScriptRunConfig
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform,choice
from azureml.widgets import RunDetails
import os

# input your hyper parameters here, eg. for decision tree
parameters = RandomParameterSampling({'--criterion':choice('gini','entropy'),
                                     '--max_depth':uniform(3,15),
                                     '--min_samples_split':uniform(2,10),
                                     '--min_samples_leaf':uniform(2,10)})

# create a policy for early stopping
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

# create a estimator from train script
estimator = ScriptRunConfig(source_directory='.',
                           script='train.py',
                           compute_target=compute_target)

hperdrive_config = HyperDriveConfig(estimator=estimator,
                                    # the primary_metric_name must be same as the run.log in train.py
                                    primary_metric_name = 'Accuracy',
                                    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                    policy=policy,
                                    hyperparameter_sampling = parameters
                                    max_total_runs=10,
                                    max_concurrent_runs = 4)

# submit hyperdrive run to the experiment and show run details with the widget.
run = exp.submit(hperdrive_config)
RunDetails(run).show()
run.wait_for_completion(show_output=True)

In [None]:
import joblib
# Get your best run and save the model from that run.
if "outputs" not in os.listdir():
    os.mkdir("./outputs")
    
best_run = run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']
print("Run ID:",best_run.id)
print('Metrics:',best_run_metrics['Accuracy'])
print(best_run_metrics)
model = best_run.register_model(model_name='DT_model',model_path='outputs/model.joblib')

In [None]:
# clean up resources
compute_target.delete()
print('Compute target deleted')