# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice

## Dataset

In [2]:
ws = Workspace.from_config()
experiment_name = 'Capstone-cancer-experiment-hyperdrive'

experiment=Experiment(ws, experiment_name)

In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

compute_cluster_name = 'cluster-capstone'

try:
    compute_target = ComputeTarget(workspace=ws, name = compute_cluster_name)
    print('Found the compute cluster')

except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D3_V2", max_nodes=4)
    compute_target = ComputeTarget.create(ws, compute_cluster_name, compute_config)
    compute_target.wait_for_completion(show_output=True)

Found the compute cluster


In [4]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.9
- scikit-learn==0.24.1
- pip:
  - xgboost==0.90
  - azureml-defaults

Overwriting conda_dependencies.yml


In [5]:
from azureml.core import Environment

sklearn_env = Environment.from_conda_specification(name="sklearn-env",file_path="./conda_dependencies.yml")

sklearn_env.register(workspace=ws)
sklearn_env.python.conda_dependencies.serialize_to_string()

'dependencies:\n- python=3.6.9\n- scikit-learn==0.24.1\n- pip:\n  - xgboost==0.90\n  - azureml-defaults\n'

## Hyperdrive Configuration



In [6]:
from azureml.widgets import RunDetails
from azureml.core import ScriptRunConfig
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment
import os
from azureml.core.script_run_config import ScriptRunConfig
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice
import shutil

# Specify parameter sampler

param_sampling = RandomParameterSampling(
    {
        '--C' : choice(0.001,0.01,0.1,1,10,20,50,100,200,500,1000),
        '--max_iter': choice(50,100,200,300)
    }
)

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")
shutil.copy('./train.py','./training')


#TODO: Create your estimator and hyperdrive config

# configure and submit your training run
src = ScriptRunConfig(source_directory='./',
                      script='train.py',
                      compute_target=compute_target,
                      environment=sklearn_env)

#Submit job : Run your experiment by submitting your ScriptRunConfig object. Note that this call is asynchronous.
run = experiment.submit(src)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=src,
                                     policy=policy,
                                     hyperparameter_sampling=param_sampling, 
                                     primary_metric_name='Accuracy',
                                     max_duration_minutes=30,
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=12,
                                     max_concurrent_runs=4)

In [10]:
#TODO: Submit your experiment
hyperdrive_run = experiment.submit(hyperdrive_config)

# Monitor HyperDrive runs You can monitor the progress of the runs with the following Jupyter widget
# RunDetails(hyperdrive_run).show()

hyperdrive_run.wait_for_completion(show_output=True)

assert(hyperdrive_run.get_status() == "Completed")

RunId: HD_6493a40e-cfc3-477b-ac06-4d8c0b54e13a
Web View: https://ml.azure.com/runs/HD_6493a40e-cfc3-477b-ac06-4d8c0b54e13a?wsid=/subscriptions/59c23a13-e17f-4934-9c23-d4a6d567d0e0/resourcegroups/ml/workspaces/udacity-projects&tid=dfec9780-9fc1-406c-8a10-3ab1cfee7c36

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-04-10T01:39:05.746183][API][INFO]Experiment created<END>\n""<START>[2021-04-10T01:39:06.433436][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-04-10T01:39:06.875781][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_6493a40e-cfc3-477b-ac06-4d8c0b54e13a
Web View: https://ml.azure.com/runs/HD_6493a40e-cfc3-477b-ac06-4d8c0b54e13a?wsid=/subscriptions/59c23a13-e17f-4934-9c23-d4a6d567d0e0/resourcegroups/ml/workspaces/udacity-projects&tid=dfec9780-9fc1-406c-8a10-3ab1cfee7c36



## Run Details

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [12]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [13]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print('\n Regularization Strength:',best_run_metrics['Regularization Strength:'])
print('\n Max Iterations:',best_run_metrics['Max iterations:'])

Best Run Id:  HD_6493a40e-cfc3-477b-ac06-4d8c0b54e13a_1

 Accuracy: 0.9333333333333333

 Regularization Strength: 0.001

 Max Iterations: 300


In [47]:
# Code below registers the best model with the information of Metrics
#TODO: Save the best model
from azureml.core import Model

model = best_run.register_model(model_path='outputs/model.pkl',model_name='HyperDrive_HighAccuracy', 
                                properties={'Accuracy': best_run_metrics['Accuracy'],
                                            'Regularization Strength': best_run_metrics['Regularization Strength:'],
                                           'Max Iterations': best_run_metrics['Max iterations:']})

# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

ModelPathNotFoundException: ModelPathNotFoundException:
	Message: Could not locate the provided model_path outputs/model.pkl in the set of files uploaded to the run: ['azureml-logs/55_azureml-execution-tvmps_bfdb26cc32e8a6e1d70f0157486796878c75c754e3bef00b2745949bc6f3f644_d.txt', 'azureml-logs/65_job_prep-tvmps_bfdb26cc32e8a6e1d70f0157486796878c75c754e3bef00b2745949bc6f3f644_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_bfdb26cc32e8a6e1d70f0157486796878c75c754e3bef00b2745949bc6f3f644_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/106_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/hyperDrive_0.001_300']
                See https://aka.ms/run-logging for more details.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Could not locate the provided model_path outputs/model.pkl in the set of files uploaded to the run: ['azureml-logs/55_azureml-execution-tvmps_bfdb26cc32e8a6e1d70f0157486796878c75c754e3bef00b2745949bc6f3f644_d.txt', 'azureml-logs/65_job_prep-tvmps_bfdb26cc32e8a6e1d70f0157486796878c75c754e3bef00b2745949bc6f3f644_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_bfdb26cc32e8a6e1d70f0157486796878c75c754e3bef00b2745949bc6f3f644_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/106_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/hyperDrive_0.001_300']\n                See https://aka.ms/run-logging for more details."
    }
}

In [35]:
# Code below registers the best model with the information of Metrics


                                           
best_run.register_model(model_path='outputs/model.pkl', model_name='HyperDrive_HighAccuracy',
                                        properties='Accuracy': best_run_metrics['Accuracy'],
                                                   'Regularization Strength': best_run_metrics['Regularization Strength:'],
                                                    'Max Iterations': best_run_metrics['Max iterations:']})



SyntaxError: invalid syntax (<ipython-input-35-de9d60992c9c>, line 7)