# Hyperparameter Tuning using HyperDrive

In [5]:
#Import dependencies
import os
import numpy as np
import pandas as pd
import joblib
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform,normal
from azureml.core import ScriptRunConfig
from azureml.core import Environment

## Set-up
My dataset will be acquired in my "train.py" training script

In [6]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

project_folder = './'
os.makedirs(project_folder, exist_ok=True)

experiment_name = 'mushroom-classification-hyperdrive-final'
experiment=Experiment(ws, experiment_name)
experiment

sandbox-main
main-workspace
westus2
095d396f-a6aa-423a-90bb-d2146baff999


Name,Workspace,Report Page,Docs Page
mushroom-classification-hyperdrive-final,sandbox-main,Link to Azure Machine Learning studio,Link to Documentation


## Create / Attach Compute Cluster

In [7]:
amlcompute_cluster_name = "default-ds2"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS_V2',
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)
print(compute_target.get_status().serialize())

Found existing cluster, use it.

Running
{'errors': [], 'creationTime': '2021-03-01T23:09:59.409359+00:00', 'createdBy': {'userObjectId': '616470c5-2ea0-40e6-bcb4-c19a2983c934', 'userTenantId': '891fb020-7b98-4908-8912-9521f813ddf0', 'userName': None}, 'modifiedTime': '2021-03-01T23:12:30.874963+00:00', 'state': 'Running', 'vmSize': 'STANDARD_DS2_V2'}


## Hyperdrive Configuration

For this hyperdrive run I am using a Logistic Regression model, the BanditPolicy as my early termination policy, RandomParameterSampling as my parameter sampling method and SKLearn as my estimator.

I chose the logistic regression model because I label encoded my data and then scaled it using StandardScaler which gave me continuous values for a lot of my features. I considered using One-Hot Encoding and using a KNN model as well but upon seeing the results of the AutoML run, I decided that the label encoding / logistic regression route seemed a little more interesting to compare against my AutoML results.

I am using BanditPolicy because it is the most cost-effective of the different policies but still returns solid results.

I am using Random Parameter Sampling because it supports early termination of low performance runs which optimizes your time spent and its good for initial searches.

I am trying to tune my learning rate and keep probability and I chose these values because they gave me the best accuracy. I tried increasing and lowering the bounds of both the learning rate and keep probability but the accuracy was highest with these values.

In [23]:
#Create an early termination policy.
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

#Create the different params that you will be using during training
param_sampling = RandomParameterSampling(
    {
        'learning_rate': normal(10,3),
        'keep_probability':uniform(0.05,0.1)
    }
)

#Create estimator and hyperdrive config
estimator = SKLearn(source_directory=project_folder,
entry_script = 'train.py',
compute_target = compute_target,
)

hyperdrive_run_config = HyperDriveConfig(estimator=estimator, 
                                    hyperparameter_sampling=param_sampling,
                                    policy=early_termination_policy,
                                    primary_metric_name='Accuracy', 
                                    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                                    max_total_runs=12,
                                    max_concurrent_runs=4)



In [24]:
#Submit your experiment
hyperdrive_run = experiment.submit(hyperdrive_run_config)
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)
assert(hyperdrive_run.get_status() == "Completed")



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_2ba09911-6ecc-41e8-ad78-715eaf5e9453
Web View: https://ml.azure.com/experiments/mushroom-classification-hyperdrive-final/runs/HD_2ba09911-6ecc-41e8-ad78-715eaf5e9453?wsid=/subscriptions/095d396f-a6aa-423a-90bb-d2146baff999/resourcegroups/main-workspace/workspaces/sandbox-main

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-04-19T23:54:40.092304][API][INFO]Experiment created<END>\n""<START>[2021-04-19T23:54:40.636754][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-04-19T23:54:40.798106][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_2ba09911-6ecc-41e8-ad78-715eaf5e9453
Web View: https://ml.azure.com/experiments/mushroom-classification-hyperdrive-final/runs/HD_2ba09911-6ecc-41e8-ad78-715eaf5e9453?wsid=/subscriptions/095d396f-a6aa-423a-90bb-d2146baff999/resourcegroups/main-workspace/workspaces/sandbox-main



## Run Details

In [26]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

## Best Model

In [27]:
#Save best run
best_run = hyperdrive_run.get_best_run_by_primary_metric()
print(best_run.get_details()['runDefinition']['arguments'])
print(best_run.get_file_names())

['--keep_probability', '0.056490916958824994', '--learning_rate', '9.839861085025014']
['azureml-logs/55_azureml-execution-tvmps_e6df162a7f0918ee4c41f8db14f4df70f8723e1cb91bad860b77c184a9683636_d.txt', 'azureml-logs/65_job_prep-tvmps_e6df162a7f0918ee4c41f8db14f4df70f8723e1cb91bad860b77c184a9683636_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_e6df162a7f0918ee4c41f8db14f4df70f8723e1cb91bad860b77c184a9683636_d.txt', 'logs/azureml/99_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/model.joblib']


In [28]:
#Save the best model
model = best_run.register_model(model_name='hyperdrive-train', model_path='outputs/model.joblib')