# Hyperparameter Tuning using HyperDrive

Import all the dependencies

In [None]:
import logging
import os
import json
import csv
import numpy as np
import pandas as pd
import joblib

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.core import ScriptRunConfig
from azureml.widgets import RunDetails
from azureml.core import Model, Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.core.conda_dependencies import CondaDependencies

print("SDK version:", azureml.core.VERSION)

## Dataset

Get data from external link in the train.py

In [None]:
ws = Workspace.from_config()
experiment_name = 'hyperdrive-run'

experiment=Experiment(ws, experiment_name)

In [None]:
amlcompute_cluster_name = "cluster-kiemdv1"

try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_V2',
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

In [None]:
from azureml.train.sklearn import SKLearn
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

## Hyperdrive Configuration

Use the LogisticRegression algorithm and use the ps hyperparameter with type RandomParameterSampling to pass a different C and max_iter for each run

In [None]:
early_termination_policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=2, delay_evaluation=5)

ps = RandomParameterSampling(
    {
        '--C': choice(0.01, 0.1, 0.3, 0.6, 0.7, 1.0),
        '--max_iter': choice(range(10,110,10))
    }
)

estimator = ScriptRunConfig(source_directory = ".",
                            script='train.py',
                            compute_target=amlcompute_cluster_name,
                            environment=sklearn_env)

hyperdrive_run_config = HyperDriveConfig(run_config=estimator,
                                     hyperparameter_sampling=ps, 
                                     policy=early_termination_policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=24,
                                     max_concurrent_runs=4)

In [None]:
run = experiment.submit(hyperdrive_run_config, show_output=True)

## Run Details

Use the `RunDetails` widget to show the different experiments.

In [None]:
RunDetails(run).show()

In [None]:
run.wait_for_completion(show_output=True)

In [None]:
run

## Best Model

Get the best model from the hyperdrive experiments and display all the properties of the model.

In [None]:
best_run = run.get_best_run_by_primary_metric()
best_run.get_file_names()
best_run_metrics = best_run.get_metrics()

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print('\n Regularization Strength:',best_run_metrics['Regularization Strength:'])
print('\n Max Iterations:',best_run_metrics['Max iterations:'])

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

Register the model, create an inference config and deploy the model as a web service.

In [None]:
model = best_run.register_model(model_name='hd-model', model_path='outputs/model.pkl')

In [None]:
print(model)

In [None]:
best_run.get_details()

In [None]:
environment = best_run.get_environment()
environment.save_to_directory(path='hyper-env')
entry_script='score.py'

inference_config = InferenceConfig(entry_script = entry_script, environment = environment)

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                                    memory_gb = 1, 
                                                    auth_enabled= True, 
                                                    enable_app_insights= True)

service_name = 'hd-deploy'
service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=deployment_config,
                       overwrite=True
                      )
service.wait_for_deployment(show_output = True)

Send a request to the web service you deployed to test it.

In [24]:
import requests
import json

scoring_uri = service.scoring_uri
key = "x9ff4t2eVs6t88EEpAPddU4obvjAtXFA"
data = {"data": [{"Pregnancies": 10, 
     "Glucose": 120, 
     "BloodPressure": 60, 
     "SkinThickness": 30, 
     "Insulin": 20, 
     "BMI": 37, 
     "DiabetesPedigreeFunction": 0.513, 
     "Age": 35},

    {"Pregnancies": 8, 
     "Glucose": 91, 
     "BloodPressure": 65, 
     "SkinThickness": 31, 
     "Insulin": 10, 
     "BMI": 29, 
     "DiabetesPedigreeFunction": 0.402, 
     "Age": 30},
      ],
  "method": "predict"}
    
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

headers = {'Content-Type': 'application/json'}
headers['Authorization'] = f'Bearer {key}'

resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())
print("Case False: Not Diabetes, Case True: Diabetes.")

{"result": [true, false]}
Case False: Not Diabetes, Case True: Diabetes.


Print the logs of the web service and delete the service

In [None]:
logs = service.get_logs()
logs

In [None]:
service.delete()