# Hyperparameter Tuning using HyperDrive

In [1]:
import os
import pandas as pd
from azureml.core import Dataset, Datastore, Workspace, Experiment
from azureml.widgets import RunDetails
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice, loguniform

## Dataset

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [2]:
ws = Workspace.from_config()
experiment_name = 'xgboost_hyperparam_search'

hyperdrive_experiment=Experiment(ws, experiment_name)

print(f"subscription key {ws.subscription_id}")
print(f"resource group {ws.resource_group}")
print(f"workspace name {ws.name}")
hyperdrive_experiment

subscription key f5091c60-1c3c-430f-8d81-d802f6bf2414
resource group aml-quickstarts-134235
workspace name quick-starts-ws-134235


Name,Workspace,Report Page,Docs Page
xgboost_hyperparam_search,quick-starts-ws-134235,Link to Azure Machine Learning studio,Link to Documentation


In [5]:
# Load the datastore
datastore = ws.get_default_datastore()

# Load the Trainind set as a Tabular dataset from the datastore register both the test and the training set
dataset_training = Dataset.Tabular.from_delimited_files(path = [(datastore, ("data/train_set_hyper_clean.csv"))])
dataset_training = dataset_training.register(workspace=ws, name="hyperparam-training-data", description="Hotel Review AutoML Training Data")

dataset_test =  Dataset.Tabular.from_delimited_files(path = [(datastore, ("data/test_set_hyper_clean.csv"))])
dataset_test = dataset_training.register(workspace=ws, name="hyperparam-test-data", description="Hotel Review AutoML Test Data")

In [6]:
print("Test set ID", dataset_test.id)
print("Train set ID", dataset_training.id)

Test set ID 9568be56-a855-4e75-9234-aa15eec3502f
Train set ID 3ca17dae-0ecb-4c90-a589-1c5409b5d77d


## Create a project folder which contains all the scripts required for hyperparameter search

In [9]:
import os
project_folder = './scripts'
# os.makedirs(project_folder, exist_ok=True)

## Define a compute target

In [10]:
## Define a Compute Target for AutoML
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cpu_cluster_name = "cpu-cluster-1"
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print("Found existing Compute Target")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size = "Standard_D2_V2", max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Found existing Compute Target
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


### Define a Virtual environment for running the Hyperparameter search

In [11]:
from azureml.core.environment import Environment
from azureml.core.model import Model
from azureml.core.conda_dependencies import CondaDependencies

# Create the environment
myenv = Environment(name="myenv")
conda_dep = CondaDependencies()

# Define the packages needed by the model and scripts
conda_dep.add_conda_package("pandas")
conda_dep.add_conda_package("numpy")
conda_dep.add_conda_package("scikit-learn")
conda_dep.add_conda_package("xgboost")
conda_dep.add_conda_package("scipy")
# You must list azureml-defaults as a pip dependency
conda_dep.add_pip_package("azureml-defaults")

# Adds dependencies to PythonSection of myenv
myenv.python.conda_dependencies=conda_dep

## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

### Run configuration for the hyperparameter search

In [12]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=project_folder,
                      script='train.py',
                      arguments=['--train-set', dataset_training.id,
                                 '--test-set', dataset_test.id,
                                 '--max-depth', 3, 
                                 '--min-child-weight', 2,
                                 '--gamma',0,
                                 '--subsample', 0.9,
                                 '--colsample-bytree', 0.8,
                                 '--reg-alpha',0.00001,
                                 '--eta',0.2,
                                 '--seed', 42,
                                 '--num-iterations', 20],
                      compute_target=compute_target,
                      environment=myenv)

### Hyperparameter search space

In [13]:
from azureml.train.hyperdrive.parameter_expressions import uniform, choice, loguniform

# Specify parameter sampler grid
parameter_sampling_grid = RandomParameterSampling(
     {
      "--max-depth": choice(3,4,5,6),
      "--min-child-weight": choice(1,2,3,4,5),
      "--colsample-bytree": uniform(0.8, 1.0),
      "--subsample": uniform(0.7, 1.0),
      "--gamma": uniform(0, 0.4),
      "--reg-alpha": loguniform(-5,-1)
     }
)

### Early termination policy

In [14]:
from azureml.train.hyperdrive import BanditPolicy
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

### hyperdrive run config

In [15]:
from azureml.train.hyperdrive import HyperDriveConfig

hyperdrive_config = HyperDriveConfig(run_config=src,
                             hyperparameter_sampling=parameter_sampling_grid,
                             policy=early_termination_policy,
                             primary_metric_name="Accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=12,
                             max_concurrent_runs=4)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [16]:
from azureml.widgets import RunDetails

run_hyper = hyperdrive_experiment.submit(config=hyperdrive_config)
RunDetails(run_hyper).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [17]:
run_hyper.wait_for_completion(show_output=True)

RunId: HD_a4ce3a4a-1d30-4a82-83a4-55bb58c29831
Web View: https://ml.azure.com/experiments/xgboost_hyperparam_search/runs/HD_a4ce3a4a-1d30-4a82-83a4-55bb58c29831?wsid=/subscriptions/f5091c60-1c3c-430f-8d81-d802f6bf2414/resourcegroups/aml-quickstarts-134235/workspaces/quick-starts-ws-134235

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-01-10T15:54:05.074227][API][INFO]Experiment created<END>\n""<START>[2021-01-10T15:54:05.883841][GENERATOR][INFO]Trying to sample '2' jobs from the hyperparameter space<END>\n"<START>[2021-01-10T15:54:06.4378683Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>"<START>[2021-01-10T15:54:06.411466][GENERATOR][INFO]Successfully sampled '2' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_a4ce3a4a-1d30-4a82-83a4-55bb58c29831
Web View: https://ml.azure.com/experiments/xgboost_hyperparam_search/runs/HD_a4ce3a4a-1d30-4a82-83a4-55bb58c29831?wsid=

{'runId': 'HD_a4ce3a4a-1d30-4a82-83a4-55bb58c29831',
 'target': 'cpu-cluster-1',
 'status': 'Completed',
 'startTimeUtc': '2021-01-10T15:54:04.858901Z',
 'endTimeUtc': '2021-01-10T16:10:51.652632Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'a4ae1b83-6d0a-43df-a9b6-34e98dd55ab4',
  'score': '0.9178867740361152',
  'best_child_run_id': 'HD_a4ce3a4a-1d30-4a82-83a4-55bb58c29831_1',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg134235.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_a4ce3a4a-1d30-4a82-83a4-55bb58c29831/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=ja%2B8ORspEwvEY3TkT%2Bm%2FzR9lj6szgfe3rbH9%2Bq9Jcms%3D&st=2021-01-10T16%3A01%3A15Z&se=2021-01-11T00%3A11%3A15Z&sp=r'}}

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [18]:
best_run = run_hyper.get_best_run_by_primary_metric()
print(best_run.get_file_names())

['azureml-logs/55_azureml-execution-tvmps_1d6552f10263597440f799e91858f72d2d74ddce125a749b08117dfba613199f_d.txt', 'azureml-logs/65_job_prep-tvmps_1d6552f10263597440f799e91858f72d2d74ddce125a749b08117dfba613199f_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_1d6552f10263597440f799e91858f72d2d74ddce125a749b08117dfba613199f_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/105_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/engine_spans_l_892bec50-b713-4b43-9fb7-28d77576c8e4.jsonl', 'logs/azureml/dataprep/python_span_l_892bec50-b713-4b43-9fb7-28d77576c8e4.jsonl', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/xgboost_model.pkl']


### We register the best model so we can deploy the model at an inference endpoint

In [19]:
best_model = best_run.register_model(model_name="best-hyperdrive-model", model_path="outputs/xgboost_model.pkl")

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

### Get some test data and predict whether those reviews are positive, neutral or negative

In [36]:
test_df = dataset_test.to_pandas_dataframe()

test_df_positive = test_df[test_df.norm_rating == 2].drop(columns=['norm_rating'])
test_df_neutral = test_df[test_df.norm_rating == 1].drop(columns=['norm_rating'])
test_df_negative = test_df[test_df.norm_rating == 0].drop(columns=['norm_rating'])

positive_example = [v for c, v in zip(test_df_positive.columns, test_df_positive.iloc[0, :])]
neutral_example = [v for c, v in zip(test_df_neutral.columns, test_df_neutral.iloc[0, :])]
negative_example = [v for c, v in zip(test_df_negative.columns, test_df_negative.iloc[0, :])]

### We deploy the best model as a Webservice 

In [49]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice

inference_config = InferenceConfig(entry_script="score.py",
                                   environment=myenv)

In [50]:
service_name = 'xgboost-review-classification'
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[best_model],
                       inference_config=inference_config,
                       deployment_config=aci_config,
                       overwrite=True)
service.wait_for_deployment(show_output=True)
print("scoring URI: " + service.scoring_uri)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running......................
Succeeded
ACI service creation operation finished, operation "Succeeded"
scoring URI: http://12fa2af6-02f6-4c8a-8ec5-d0dace7fec77.southcentralus.azurecontainer.io/score


### We send a request to the webservice and check the response

In [58]:
import requests
import json


# Add content type header
headers = {'Content-Type':'application/json'}

# Sample data to send to the service
test_sample = json.dumps({'data': [
    positive_example,
    neutral_example,
    negative_example
]})
test_sample = bytes(test_sample, encoding = 'utf8')

# Replace with the URL for your compute instance, as determined from the previous section
service_url = "http://12fa2af6-02f6-4c8a-8ec5-d0dace7fec77.southcentralus.azurecontainer.io/score"
# for a compute instance, the url would be https://vm-name-6789.northcentralus.instances.azureml.net/score
response = requests.post(service_url, test_sample, headers=headers)
print("prediction:", response.text)

prediction: [2.0, 1.0, 0.0]


In [63]:
dir(service)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_cache',
 '_abc_negative_cache',
 '_abc_negative_cache_version',
 '_abc_registry',
 '_add_properties',
 '_add_tags',
 '_all_subclasses',
 '_auth',
 '_build_create_payload',
 '_check_for_compute_resource',
 '_check_for_local_deployment',
 '_check_for_webservice',
 '_check_validate_error',
 '_deploy',
 '_deploy_webservice',
 '_expected_payload_keys',
 '_generate_common_validation_payload',
 '_get',
 '_get_deploy_compute_type',
 '_get_operation_state',
 '_initialize',
 '_mms_endpoint',
 '_model_config_map',
 '_operation_endpoint',
 '_refresh_token_time',
 '_remove_tags',
 '_request_validate_resou

### Here the logs of the Web service are printed

In [59]:
service.get_logs()

'2021-01-10T16:32:13,836027600+00:00 - rsyslog/run \n2021-01-10T16:32:13,846090000+00:00 - iot-server/run \n2021-01-10T16:32:13,855473900+00:00 - gunicorn/run \n2021-01-10T16:32:13,860955500+00:00 - nginx/run \n/usr/sbin/nginx: /azureml-envs/azureml_a88f510c1dca01d6feb0dcec55cae0d4/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_a88f510c1dca01d6feb0dcec55cae0d4/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_a88f510c1dca01d6feb0dcec55cae0d4/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_a88f510c1dca01d6feb0dcec55cae0d4/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_a88f510c1dca01d6feb0dcec55cae0d4/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)

### Delete the Webservice

In [64]:
service.delete()

### If you woul like to perform some Local Deployment instead

In [None]:
from azureml.core.webservice import Webservice, LocalWebservice
from azureml.core.model import InferenceConfig
from azureml.core.model import Model

inference_config = InferenceConfig(entry_script="score.py", environment=myenv)

deployment_config = LocalWebservice.deploy_configuration(port=6789)

local_service = Model.deploy(workspace=ws, 
                             name='review-xgboost-local', 
                             models=[best_model], 
                             inference_config=inference_config, 
                             deployment_config = deployment_config)

local_service.wait_for_deployment(show_output=True)
print(f"Scoring URI is : {local_service.scoring_uri}")