# Hyperparameter Tuning using HyperDrive

In [1]:
import os
import pandas as pd
from azureml.core import Dataset, Datastore, Workspace, Experiment
from azureml.widgets import RunDetails
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice, loguniform

## Dataset

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [2]:
ws = Workspace.from_config()
experiment_name = 'xgboost_hyperparam_search'

hyperdrive_experiment=Experiment(ws, experiment_name)

print(f"subscription key {ws.subscription_id}")
print(f"resource group {ws.resource_group}")
print(f"workspace name {ws.name}")
hyperdrive_experiment

subscription key f9d5a085-54dc-4215-9ba6-dad5d86e60a0
resource group aml-quickstarts-134076
workspace name quick-starts-ws-134076


Name,Workspace,Report Page,Docs Page
xgboost_hyperparam_search,quick-starts-ws-134076,Link to Azure Machine Learning studio,Link to Documentation


In [72]:
# Load the datastore
datastore = ws.get_default_datastore()

# Load the Trainind set as a Tabular dataset from the datastore register both the test and the training set
dataset_training = Dataset.Tabular.from_delimited_files(path = [(datastore, ("data/train_set_hyper.csv"))])
dataset_training = dataset_training.register(workspace=ws, name="hyperparam-training-data", description="Hotel Review AutoML Training Data")

dataset_test =  Dataset.Tabular.from_delimited_files(path = [(datastore, ("data/test_set_hyper.csv"))])
dataset_test = dataset_training.register(workspace=ws, name="hyperparam-test-data", description="Hotel Review AutoML Test Data")

In [74]:
print("Test set ID", dataset_test.id)
print("Train set ID", dataset_training.id)

'6ed6e861-458b-4554-8692-7119175c3230'

## Create a project folder which contains all the scripts required for hyperparameter search

In [28]:
import os
project_folder = './scripts'
# os.makedirs(project_folder, exist_ok=True)

## Define a compute target

In [5]:
## Define a Compute Target for AutoML
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cpu_cluster_name = "cpu-cluster-1"
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print("Found existing Compute Target")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size = "Standard_D2_V2", max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


### Define a Virtual environment for running the Hyperparameter search

In [6]:
from azureml.core.environment import Environment
from azureml.core.model import Model
from azureml.core.conda_dependencies import CondaDependencies

# Create the environment
myenv = Environment(name="myenv")
conda_dep = CondaDependencies()

# Define the packages needed by the model and scripts
conda_dep.add_conda_package("pandas")
conda_dep.add_conda_package("numpy")
conda_dep.add_conda_package("scikit-learn")
conda_dep.add_conda_package("xgboost")
conda_dep.add_conda_package("scipy")
# You must list azureml-defaults as a pip dependency
conda_dep.add_pip_package("azureml-defaults")

# Adds dependencies to PythonSection of myenv
myenv.python.conda_dependencies=conda_dep

## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

### Run configuration for the hyperparameter search

In [78]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=project_folder,
                      script='train.py',
                      arguments=['--train-set', dataset_training.id,
                                 '--test-set', dataset_test.id,
                                 '--max-depth', 3, 
                                 '--min-child-weight', 2,
                                 '--gamma',0,
                                 '--subsample', 0.9,
                                 '--colsample-bytree', 0.8,
                                 '--reg-alpha',0.00001,
                                 '--eta',0.2,
                                 '--seed', 42,
                                 '--num-iterations', 20],
                      compute_target=compute_target,
                      environment=myenv)

### Hyperparameter search space

In [79]:
from azureml.train.hyperdrive.parameter_expressions import uniform, choice, loguniform

# Specify parameter sampler grid
parameter_sampling_grid = RandomParameterSampling(
     {
      "--max-depth": choice(3,4,5,6),
      "--min-child-weight": choice(1,2,3,4,5),
      "--colsample-bytree": uniform(0.8, 1.0),
      "--subsample": uniform(0.7, 1.0),
      "--gamma": uniform(0, 0.4),
      "--reg-alpha": loguniform(-5,-1)
     }
)

### Early termination policy

In [80]:
from azureml.train.hyperdrive import BanditPolicy
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

### hyperdrive run config

In [81]:
from azureml.train.hyperdrive import HyperDriveConfig

hyperdrive_config = HyperDriveConfig(run_config=src,
                             hyperparameter_sampling=parameter_sampling_grid,
                             policy=early_termination_policy,
                             primary_metric_name="Accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=12,
                             max_concurrent_runs=2)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [82]:
from azureml.widgets import RunDetails

run_hyper = hyperdrive_experiment.submit(config=hyperdrive_config)
RunDetails(run_hyper).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [None]:
run_hyper.wait_for_completion(show_output=True)

RunId: HD_ab6a049d-63ea-49c9-a245-f2aaaefcd10b
Web View: https://ml.azure.com/experiments/xgboost_hyperparam_search/runs/HD_ab6a049d-63ea-49c9-a245-f2aaaefcd10b?wsid=/subscriptions/f9d5a085-54dc-4215-9ba6-dad5d86e60a0/resourcegroups/aml-quickstarts-134076/workspaces/quick-starts-ws-134076

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-01-09T14:49:58.079674][API][INFO]Experiment created<END>\n""<START>[2021-01-09T14:49:58.826594][GENERATOR][INFO]Trying to sample '2' jobs from the hyperparameter space<END>\n""<START>[2021-01-09T14:49:58.979250][GENERATOR][INFO]Successfully sampled '2' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-01-09T14:49:59.2953774Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>


## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [None]:
best_run = run_hyper.get_best_run_by_primary_metric()
print(best_run.get_file_names())

In [None]:
best_model = best_run.register_model(model_name="best-hyperdrive-model", model_path="outputs/model.pkl")

In [None]:
#TODO: Save the best model

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [None]:
from azureml.core.webservice import Webservice
from azure.core.model import InferenceConfig
from azureml.core.model import Model

inference_config = InferenceConfig(entry_script="score.py", environment=myenv)

deployment_config = LocalWebservice.deploy_configuration(port=6789)

local_service = Model.deploy(workspace=ws, 
                             name='review-xgboost-local', 
                             models=[best_model], 
                             inference_config=inference_config, 
                             deployment_config = deployment_config)

local_service.wait_for_deployment(show_output=True)
print(f"Scoring URI is : {local_service.scoring_uri}")

In [None]:
import requests

example_data = 

input_data = {"data": []}

headers = {'Content-Type': 'application/json'}

scoring_uri = "http://localhost:6789/score"
resp = requests.post(scoring_uri, input_data, headers=headers)

print("Should be predicted as '2'")
print("prediction:", resp.text)

In [None]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice

inference_config = InferenceConfig(entry_script="score.py",
                                   environment=myenv)

In [None]:
service_name = 'xgboost-review-classification'
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[best_model],
                       inference_config=inference_config,
                       deployment_config=aci_config,
                       overwrite=True)
service.wait_for_deployment(show_output=True)
print("scoring URI: " + service.scoring_uri)

TODO: In the cell below, send a request to the web service you deployed to test it.

In [None]:
import requests
import json
from azureml.core.authentication import InteractiveLoginAuthentication

# Get a token to authenticate to the compute instance from remote
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

# Create and submit a request using the auth header
headers = auth_header
# Add content type header
headers.update({'Content-Type':'application/json'})

# Sample data to send to the service
test_sample = json.dumps({'data': [
    [1,2,3,4,5,6,7,8,9,10],
    [10,9,8,7,6,5,4,3,2,1]
]})
test_sample = bytes(test_sample, encoding = 'utf8')

# Replace with the URL for your compute instance, as determined from the previous section
service_url = service.endpoint
# for a compute instance, the url would be https://vm-name-6789.northcentralus.instances.azureml.net/score
response = requests.post(service_url, test_sample, headers=headers)
print("prediction:", response.text)

TODO: In the cell below, print the logs of the web service and delete the service