## Hyperparameter Tuning using HyperDrive

Here we create an experiment in the workspace. Notice that the configuration for the workspace is obtained from the config.json file.

In [1]:
from azureml.core import Workspace, Experiment, Environment
from azureml.core.authentication import InteractiveLoginAuthentication

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project-1")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-176882
Azure region: southcentralus
Subscription id: 61c5c3f0-6dc7-4ed9-a7f3-c704b20e3b30
Resource group: aml-quickstarts-176882


## Create a CPU Cluster

In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
cpu_cluster_name = "cpucluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # To use a different region for the compute, add a location='<region>' parameter
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


The model will be generated using this version of skit-learn so we need to configure this exact version when deploying, otherwise we'll get an error when trying to obtain the model from the file using joblib.

In [23]:
SKLearn.get_supported_versions()

['0.20.3']

## Hyperdrive Configuration

In [24]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os
from azureml.core import ScriptRunConfig

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        '--e': choice(500,1000,1500,2000,3000),
        '--md': choice(10,50,100,150),
        '--msp': choice(2,5,7)
    }
)

# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=3)

# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory='.',
                             entry_script='train.py',
                             compute_target=cpu_cluster,
                             conda_packages=['pandas'])

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est,
                                     hyperparameter_sampling=ps,
                                     policy=policy,
                                     primary_metric_name="Accuracy",
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=12,
                                     max_concurrent_runs=4)



## Run Details

In [21]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
hdr = exp.submit(config = hyperdrive_config)
RunDetails(hdr).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

## Best Model

Get the best model from the hyperdrive experiment and register it.

In [6]:
import joblib
# Get your best run and save the model from that run.

best_run = hdr.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
best_run_metrics

{'Number of estimators': 3000,
 'Max Depth': 100,
 'Min Samples Split': 2,
 'Accuracy': 0.9992679137159137}

In [8]:
import os

if "best_model_hyperdrive" not in os.listdir():
    os.mkdir("./best_model_hyperdrive")

# download model
best_run.download_file('outputs/model.joblib', output_file_path="./best_model_hyperdrive")

## Deploy the Model

Register the model to be deployed.

In [12]:
# Register it
model = best_run.register_model(model_name="hd-model", model_path="./outputs/model.joblib", tags=best_run_metrics)

The model was first deployed locally in order to see the log errors and then it was deploy it within an Azure Compute Instance.

In [63]:
# create environment for the deploy
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.webservice import AciWebservice

dockerfile = r"""
FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211221.v1

ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/sklearn-0.24.1

# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
    python=3.7 pip=20.2.4

# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH

# Install pip dependencies
RUN pip install 'matplotlib>=3.3,<3.4' \
                'joblib==1.1.0' \
                'pandas>=1.1,<1.2' \
                'scipy>=1.5,<1.6' \
                'numpy>=1.10,<1.20' \
                'ipykernel~=6.0' \
                'azureml-core==1.37.0.post1' \
                'azureml-defaults==1.37.0' \
                'azureml-mlflow==1.37.0' \
                'azureml-telemetry==1.37.0' \
                'scikit-learn==0.20.3'
                 

# This is needed for mpi to locate libpython
ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH
"""

# create a custom environment
env = Environment(name="scikit-learn-env-3")
python_packages = ['joblib', 'numpy', 'scikit-learn==0.20.3']
for package in python_packages:
    env.python.conda_dependencies.add_pip_package(package)
env.inferencing_stack_version='latest'

#env.docker.base_image = None
#env.docker.base_dockerfile = dockerfile

# create deployment config i.e. compute resources
aciconfig = AciWebservice.deploy_configuration(
    cpu_cores=1,
    memory_gb=1,
    tags={"data": "divorce-ds", "method": "random-forest"},
    description="Predict divorce using random-forest",
    auth_enabled=True,
    enable_app_insights=True
)

In [64]:
import uuid
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core.model import Model
from azureml.core.webservice import LocalWebservice

# create an inference config i.e. the scoring script and environment
inference_config = InferenceConfig(entry_script="score.py", environment=env)
local_config = LocalWebservice.deploy_configuration(port=6790)
# deploy the service
service_name = "random-forest-divorce-ee61"
service = Model.deploy(
    workspace=ws,
    name=service_name,
    models=[model],
    inference_config=inference_config,
    deployment_config=aciconfig,
    overwrite=True
)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-01-19 23:28:48+00:00 Creating Container Registry if not exists.
2022-01-19 23:28:48+00:00 Registering the environment.
2022-01-19 23:28:49+00:00 Use the existing image.
2022-01-19 23:28:49+00:00 Generating deployment configuration.
2022-01-19 23:28:50+00:00 Submitting deployment to compute.
2022-01-19 23:28:52+00:00 Checking the status of deployment random-forest-divorce-ee61..
2022-01-19 23:31:24+00:00 Checking the status of inference endpoint random-forest-divorce-ee61.
Succeeded
ACI service creation operation finished, operation "Succeeded"


## Consume API

Create a fake input to predict, obtain the key and then send the request to the API to obtain a predicted value.

In [103]:
import numpy as np
# Request data goes here
data = {"data": [[]]}
for i in range(1,55):
    data["data"][0].append(1)

In [66]:
pr, sec = service.get_keys()
pr

'SCZHBbz7GB1rOYViR46Oh9duZUsOFQfM'

In [70]:
print(service.swagger_uri)

http://a8f423b6-6f62-4182-b002-b72171dce330.southcentralus.azurecontainer.io/swagger.json


In [106]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

body = str.encode(json.dumps(data))

url = 'http://a8f423b6-6f62-4182-b002-b72171dce330.southcentralus.azurecontainer.io/score'
api_key = 'SCZHBbz7GB1rOYViR46Oh9duZUsOFQfM' # Replace this with the API key for the web service
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)
    
    result = json.loads(response.read().decode("utf8", 'ignore'))
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    #print(json.loads(error.read().decode("utf8", 'ignore')))
    print(error.read().decode("utf8", 'ignore'))

[0.099]


If we answer 1 to all the questions from the test the model predict that we might not divorce

In [None]:
service.delete()
cpu_cluster.delete()