# Hyperparameter Tuning using HyperDrive

* Importing Workspace and Experiment class

In [1]:
from azureml.core import Workspace, Experiment

## Setting up the workspace

* Setting experiment name and associating it to the workspace

In [2]:
ws = Workspace.from_config()

print('WORKSPACE DETAILS:', 'Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

experiment_name = 'heart-disease-hyperdrive'
experiment=Experiment(workspace=ws, name=experiment_name)

run = experiment.start_logging()

WORKSPACE DETAILS:
Workspace name: quick-starts-ws-137068
Azure region: southcentralus
Subscription id: f9d5a085-54dc-4215-9ba6-dad5d86e60a0
Resource group: aml-quickstarts-137068


## Compute Cluster
* Setting up the Compute cluster VM `cpu-cluster` to run the experiment

In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
# Creating compute cluster
# vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes = 4.

cpu_cluster_name = "cpu-cluster"

# to check whether the compute cluster exists already or not
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print("Existing compute target found... Using it")

except ComputeTargetException:
    print("Creating new Compute Target... " + cpu_cluster_name)
    provisioning_compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2", max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, provisioning_compute_config)

cpu_cluster.wait_for_completion(show_output=True)

Creating new Compute Target... cpu-cluster
Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Datset Preview
* Uploading the dataset from the given URL and reviewing the DataFrame

In [4]:
from azureml.data.dataset_factory import TabularDatasetFactory
import pandas as pd

url_path = "https://raw.githubusercontent.com/bharati-21/AZMLND-Capstone-Project/master/files/heart.csv"
# ds = TabularDatasetFactory.from_delimited_files(path=url_path)

df = pd.read_csv(url_path)

In [5]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


## Hyperdrive Configuration

1. Choosing `BanditPolicy` as the terminating policy to terminate low performing runs
1. Choosing `RandomParameterSampling` to randomly search the search space and optimize hyperparameter tuning
1. Using an `SKLearn Estimator` to invoke and begin the training script
1. Using HyperDriveConfig class to set up configuration object for the HyperDrive experiment.
   1. Using metic as `accuracy` with metric goal as `maximize`, `max_total_runs=20` and `max_concurrent_runs=4`

In [6]:
from azureml.widgets import RunDetails
import shutil
from azureml.train.sklearn import SKLearn
from azureml.core import ScriptRunConfig, Experiment
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import BayesianParameterSampling
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.train.hyperdrive import HyperDriveConfig
import os


# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=5)

# TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling({
    "--C" : uniform(0.01, 1),
    "--max_iter" : choice(10, 50, 100, 150, 200),
})

#TODO: Create your estimator and hyperdrive config
if "training" not in os.listdir():
    os.mkdir("./training")

estimator = SKLearn (
    source_directory= os.path.join("./"),
    compute_target= cpu_cluster,
    entry_script= "train.py"
)

hyperdrive_run_config = HyperDriveConfig (
        estimator = estimator, 
        hyperparameter_sampling = param_sampling, 
        policy = early_termination_policy,
        primary_metric_name = 'accuracy', 
        primary_metric_goal = PrimaryMetricGoal.MAXIMIZE, 
        max_total_runs = 20,
        max_concurrent_runs = 4
)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [7]:
#TODO: Submit your experiment

run = experiment.submit(hyperdrive_run_config)



## Run Details

* Many models were trained with different hyperparameter values. Varying accuracy can be observed because of different hyperparameters tuned from the given search space

In [8]:
RunDetails(run).show()
run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_83438b92-fe6f-4819-9870-97e8a3d893f9
Web View: https://ml.azure.com/experiments/heart-disease-hyperdrive/runs/HD_83438b92-fe6f-4819-9870-97e8a3d893f9?wsid=/subscriptions/f9d5a085-54dc-4215-9ba6-dad5d86e60a0/resourcegroups/aml-quickstarts-137068/workspaces/quick-starts-ws-137068

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-02-03T10:41:46.033117][API][INFO]Experiment created<END>\n""<START>[2021-02-03T10:41:46.573282][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-02-03T10:41:46.885208][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-02-03T10:41:47.2612413Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_83438b92-fe6f-4819-9870-97e8a3d893f9
Web View: https://ml.azure.com/experiments/heart-disease-hyperdrive/runs/HD_83438b92-fe6f-4819-9870-97e8a3d893f9?wsid=/s

{'runId': 'HD_83438b92-fe6f-4819-9870-97e8a3d893f9',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-02-03T10:41:45.310955Z',
 'endTimeUtc': '2021-02-03T10:54:08.199486Z',
 'properties': {'primary_metric_config': '{"name": "accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '397635d7-5140-4dde-94fa-300fbfe17cdd',
  'score': '0.8852459016393442',
  'best_child_run_id': 'HD_83438b92-fe6f-4819-9870-97e8a3d893f9_1',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg137068.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_83438b92-fe6f-4819-9870-97e8a3d893f9/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=hlnVeEna3yOJr5XmP5K2davTnJmOFuW%2BfrwTDNApz0I%3D&st=2021-02-03T10%3A44%3A22Z&se=2021-02-03T18%3A54%3A22Z&sp=r'},
 'submittedBy': 'ODL_User 137068'}

## Best Model

* The best model obtained was a Logistic Regression algorithm with an accuracy of `0.88525`, `--max_iter=200` and `--C=0.8751` 

In [9]:
import joblib
# Get your best run and save the model from that run.

best_run = run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']
print('Best Run ID: ', best_run.id)
print('Best Run Accuracy: ', best_run_metrics['accuracy'])
print('Best Run Learning Rate: ', parameter_values[3])
print('Parameter Values: ', parameter_values)

Best Run ID:  HD_83438b92-fe6f-4819-9870-97e8a3d893f9_1
Best Run Accuracy:  0.8852459016393442
Best Run Learning Rate:  200
Parameter Values:  ['--C', '0.8750515086805049', '--max_iter', '200']


In [18]:
best_run.get_file_names()

['azureml-logs/55_azureml-execution-tvmps_f5fb4de558d76f72eaed1a185e229324af0d024d50c6249a6b27d8ba97311800_d.txt',
 'azureml-logs/65_job_prep-tvmps_f5fb4de558d76f72eaed1a185e229324af0d024d50c6249a6b27d8ba97311800_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_f5fb4de558d76f72eaed1a185e229324af0d024d50c6249a6b27d8ba97311800_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'logs/azureml/104_azureml.log',
 'logs/azureml/job_prep_azureml.log',
 'logs/azureml/job_release_azureml.log',
 'outputs/hyperdrive-heart-disease.pkl']

In [24]:
import joblib
from azureml.core.model import Model

#TODO: Save the best model
description = 'Model trained on the Heart Disease UCI Machine Learning Dataset from Kaggle' 

registered_model = best_run.register_model(model_name = 'hyperdrive-heart-disease',
                                             model_path = './outputs/hyperdrive-heart-disease.pkl',
                                             model_framework = Model.Framework.SCIKITLEARN,
                                             model_framework_version = '0.22.2',
                                             description = description)

print("Model successfully saved.")

Model successfully saved.


In [26]:
registered_model

Model(workspace=Workspace.create(name='quick-starts-ws-137068', subscription_id='f9d5a085-54dc-4215-9ba6-dad5d86e60a0', resource_group='aml-quickstarts-137068'), name=hyperdrive-heart-disease, id=hyperdrive-heart-disease:1, version=1, tags={}, properties={})

In [27]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
heart-disease-hyperdrive,HD_83438b92-fe6f-4819-9870-97e8a3d893f9_1,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


## Model Deployment

1. Creating a `score.py` file to load the best trained model.
1. Specifying dependenices.
1. Defining an inference configuration to set up the web-service containing the model, which is used later while deploying the model.
1. Choosing a compute target and deployment configuration to host the model.
1. Deploying the machine learning model

In [28]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


* Score.py Has two functions, init() and run(input_data).
    - init() function loads the model into a global object. This function is executed only once when the Docker container is started.
    - run(input_data) function is used to predict a value based on the input data.

In [29]:
%%writefile score.py

import json
import numpy as np
import os
import pickle
# import sklearn.external.joblib as extjoblib
import joblib
# from sklearn.linear_model import LogisticRegression

from azureml.core.model import Model


def init():
    global model
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'hyperdrive-heart-disease.pkl')
    model = joblib.load(model_path)

    
def run(data):
    try: 
        data = np.array(json.loads(data))
        result = model.predict(data)
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error
    

Overwriting score.py


In [30]:
from azureml.core.environment import Environment

myenv = Environment(name="myenv")
myenv.register(workspace=ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/intelmpi2018.3-ubuntu16.04:20210104.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "myenv",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                "conda-forge"
      

In [31]:
from azureml.core.runconfig import CondaDependencies

cd = CondaDependencies.create()
cd.add_conda_package('numpy')
cd.add_conda_package("scikit-learn")

env = Environment.get(ws, "myenv").clone('new_myenv')

for pip_package in ["scikit-learn"]:
    env.python.conda_dependencies.add_pip_package(pip_package)


cd.add_pip_package("azureml-defaults")
cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')


print(cd.serialize_to_string())

# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
  - azureml-defaults
- numpy
- scikit-learn
channels:
- anaconda
- conda-forge



* Choosing a compute target and deployment configuration to host the model. `Azure Conatiner Service` is used to deploy the model.

In [32]:
from azureml.core.webservice import AciWebservice

aci_config = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               description='Predict if a person has Heart disease or not',
                                               auth_enabled = True
)                                


* Defining an inference configuration to set up the web-service containing the model, which is used later while deploying the model.
* The model is then deployed by passing workspace, model, inference config and deployment config as parameters using `Model.deploy()`

In [36]:
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.automl.core.shared import constants

model = Model(ws, 'hyperdrive-heart-disease')


myenv = Environment.from_conda_specification(name="myenv", file_path="myenv.yml")
inference_config = InferenceConfig(entry_script="score.py", environment=myenv)

service = Model.deploy(workspace=ws, 
                       name='heart-disease-hyperdrive', 
                       models=[model], 
                       inference_config=inference_config, 
                       deployment_config=aci_config)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running.....................................................................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"


* State of the deployed web service

In [39]:
service.state

'Healthy'

* Get logs to view the deployment overview

In [40]:
print(service.get_logs())

2021-02-03T12:01:41,385302639+00:00 - rsyslog/run 
/usr/sbin/nginx: /azureml-envs/azureml_5891abd828725e6d4f7b48940d1f8d13/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_5891abd828725e6d4f7b48940d1f8d13/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_5891abd828725e6d4f7b48940d1f8d13/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_5891abd828725e6d4f7b48940d1f8d13/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_5891abd828725e6d4f7b48940d1f8d13/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
2021-02-03T12:01:41,386860844+00:00 - gunicorn/run 
2021-02-03T12:01:41,386930345+00:00 - nginx/run 
2021-02-03T12:01:41,388398950+00:00 - iot-server/run 
EdgeHubC

* Enabling app insights for logging

In [42]:
service.update(enable_app_insights=True)

In [43]:
service

AciWebservice(workspace=Workspace.create(name='quick-starts-ws-137068', subscription_id='f9d5a085-54dc-4215-9ba6-dad5d86e60a0', resource_group='aml-quickstarts-137068'), name=heart-disease-hyperdrive, image_id=None, compute_type=None, state=ACI, scoring_uri=Healthy, tags=http://470f5572-e98b-468b-8a0e-d0e249caed1a.southcentralus.azurecontainer.io/score, properties={}, created_by={'azureml.git.repository_uri': 'https://github.com/bharati-21/AZMLND-Capstone-Project.git', 'mlflow.source.git.repoURL': 'https://github.com/bharati-21/AZMLND-Capstone-Project.git', 'azureml.git.branch': 'master', 'mlflow.source.git.branch': 'master', 'azureml.git.commit': 'a3fa74d4a26239aa160550347efde18d98d6fea5', 'mlflow.source.git.commit': 'a3fa74d4a26239aa160550347efde18d98d6fea5', 'azureml.git.dirty': 'True', 'hasInferenceSchema': 'False', 'hasHttps': 'False'})

In [44]:
print(service.scoring_uri)

http://470f5572-e98b-468b-8a0e-d0e249caed1a.southcentralus.azurecontainer.io/score


In [45]:
print(service.swagger_uri)

http://470f5572-e98b-468b-8a0e-d0e249caed1a.southcentralus.azurecontainer.io/swagger.json


In [47]:
from azureml.data.dataset_factory import TabularDatasetFactory
from train import clean_data
from sklearn.model_selection import train_test_split
import pandas as pd

url_path = "https://raw.githubusercontent.com/bharati-21/AZMLND-Capstone-Project/master/files/heart.csv"
ds = TabularDatasetFactory.from_delimited_files(path=url_path)


# Use the clean_data function to clean your data.
x, y = clean_data(ds)

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size = 0.2)

Shape of dataset before split: (302, 21)


In [112]:
# source: https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb
import json
import matplotlib.pyplot as plt
import numpy as np 

data = [[67.0, 120.0, 229.0, 129.0, 2.6, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]]
test_samples = json.dumps(data)

result = service.run(input_data=test_samples)

print(result)

[0]


* Sending a HTTP request by randomly picking up a row from test data and invoking `post()` which takes scoring uri and input data in JSON format to predict

In [161]:
import requests
import json

# URL for the web service, should be similar to:
scoring_uri = 'http://470f5572-e98b-468b-8a0e-d0e249caed1a.southcentralus.azurecontainer.io/score'
key = 'NtorvXegdH9EAhMDhbhlhVzmH0Z9XskK'

# Set the content type
headers = {'Content-Type': 'application/json'}

# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'


random_index = np.random.randint(0, len(test_x)-1)
input_data =  [list(test_x.iloc[random_index])] 
input_data = json.dumps(input_data)

print(input_data)
print("\nActual Value:", test_y.iloc[random_index])
response = requests.post(scoring_uri, input_data, headers=headers)
print("Predicted Value:", response.text)

[[58.0, 120.0, 340.0, 172.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0]]

Actual Value: 1
Predicted Value: [1]


* Deleting the service and cluster

In [163]:
service.delete()

In [164]:
cpu_cluster.delete()