# Hyperparameter Tuning using HyperDrive

- Install kaggle datasets for easy dataset load
- Load all needed packages
- Prepare workspace and compute cluster

In [15]:
!pip install opendatasets



In [1]:
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core import Dataset
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os
import shutil
import opendatasets as od
import pandas as pd
from azureml.widgets import RunDetails

In [2]:
ws = Workspace.from_config()
experiment_name = 'udacity_capstone_hyperdrive'

experiment = Experiment(ws, experiment_name)

In [3]:
cluster_name = "IntensePurposeCluster"

try:
    compute_cluster = ComputeTarget(ws, cluster_name)
    print('existing cluster found')
except:
    compute_config = AmlCompute.provisioning_configuration(vm_size = "Standard_D2_V2", max_nodes=4)
    compute_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
    
    compute_cluster.wait_for_completion(show_output=True)

existing cluster found


## Dataset

We are using a patient data dataset for covid-19 mortaility from kaggle. We want to try and predict if a patient will die given their circumstances.

- Check if data is already available and download from kaggle if not
- Read and transform data
- Register dataframe as dataset in the workspace

In [4]:
#Check if data exists, else download
if not os.path.exists('covid19-dataset/Covid Data.csv'):
    # Download kaggle covid dataset
    print('downlaoding data')
    od.download('https://www.kaggle.com/datasets/meirnizri/covid19-dataset/', '.')

In [5]:
df = pd.read_csv(r'./covid19-dataset/Covid Data.csv')

In [6]:
#Transform data to get correct format
df['y'] = (df['DATE_DIED'] != '9999-99-99').astype(int)
df['SEX'] = df['SEX'] - 1
df['hospitalized'] = df['PATIENT_TYPE'] - 1
df['tested_positive'] = (df['CLASIFFICATION_FINAL']<4).astype(int)
df = df.drop(columns=['DATE_DIED', 'PATIENT_TYPE', 'CLASIFFICATION_FINAL'])

cols = ['INTUBED', 'PNEUMONIA', 'PREGNANT', 'DIABETES', 'COPD', 'ASTHMA', 'INMSUPR',
       'HIPERTENSION', 'OTHER_DISEASE', 'CARDIOVASCULAR', 'OBESITY',
       'RENAL_CHRONIC', 'TOBACCO', 'ICU']

for col in cols:
    df[col] = (df[col] == 1).astype(int)

In [7]:
df.head(9)

Unnamed: 0,USMER,MEDICAL_UNIT,SEX,INTUBED,PNEUMONIA,AGE,PREGNANT,DIABETES,COPD,ASTHMA,...,HIPERTENSION,OTHER_DISEASE,CARDIOVASCULAR,OBESITY,RENAL_CHRONIC,TOBACCO,ICU,y,hospitalized,tested_positive
0,2,1,0,0,1,65,0,0,0,0,...,1,0,0,0,0,0,0,1,0,1
1,2,1,1,0,1,72,0,0,0,0,...,1,0,0,1,1,0,0,1,0,0
2,2,1,1,1,0,55,0,1,0,0,...,0,0,0,0,0,0,0,1,1,1
3,2,1,0,0,0,53,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,2,1,1,0,0,68,0,1,0,0,...,1,0,0,0,0,0,0,1,0,1
5,2,1,0,0,1,40,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
6,2,1,0,0,0,64,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
7,2,1,0,0,1,64,0,1,0,0,...,1,0,0,0,1,0,0,0,0,1
8,2,1,0,0,0,37,0,1,0,0,...,1,0,0,1,0,0,0,0,1,1


In [8]:
datastore = ws.get_default_datastore()

dataset = Dataset.Tabular.register_pandas_dataframe(df,
                target=datastore, 
                name='capstone_dataset', 
                description='Input dataset (Kaggle covid data) for capstone project')

Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/b6d2908f-f110-4d1c-883b-1eeb8e0ada62/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


## Hyperdrive Configuration

Here we define the parameter optimisation parameters.
- Use a simple Bandit policy to stop optimisation early in case of stalling improvements
- Define search grid for parameters
- Create experiment folder to store run specifc files in and copy in training script
- Define environment for training and configure optimisation run utilising training script

In [9]:
# Utilising a simple Bandit Policy for early termination in case of stagnant performance
early_termination_policy = BanditPolicy(evaluation_interval=3, slack_factor=0.1)

# Defining a search space for the optimal parameters (GradientBoostingTree)
param_sampling = RandomParameterSampling({'learning_rate':uniform(0.01, 1.0),
                             'n_estimators': choice(50, 100, 150, 200, 250, 300, 500),
                             "subsample": choice(0.5, 0.9, 1.0)})

# Create directories to ensure correct model tracking
if experiment_name not in os.listdir():
    os.mkdir("./" + experiment_name + '/')
    shutil.copy2('./covdata_train.py', './'+ experiment_name + '/')

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

# Create a ScriptRunConfig Object to specify the configuration details of your training job
# Using script run instead of estimator for the hyperdrive run
src = ScriptRunConfig(source_directory='./'+ experiment_name + '/', 
                      script='covdata_train.py', 
                      compute_target=compute_cluster,
                     arguments=[],
                     environment=sklearn_env)

hyperdrive_run_config = HyperDriveConfig(run_config=src,
                                    hyperparameter_sampling=param_sampling,
                                    policy=early_termination_policy,
                                    primary_metric_name='AUC_weighted',
                                    primary_metric_goal= PrimaryMetricGoal.MAXIMIZE,
                                    max_total_runs=50,
                                    max_concurrent_runs=4)

In [10]:
# Submiting the experiment
hdr = experiment.submit(config=hyperdrive_run_config)

## Run Details

We test only one model here utilising a SKlearn gradient boosting tree. See details of the run below. 
This cell will only complete once the training run completes such that the entire notebook can be run at once without causing errors.

In [11]:
RunDetails(hdr).show()
hdr.wait_for_completion()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

{'runId': 'HD_de53d3fa-674d-42d8-a352-1e3e33e7157c',
 'target': 'IntensePurposeCluster',
 'status': 'Completed',
 'startTimeUtc': '2023-03-17T09:27:36.599478Z',
 'endTimeUtc': '2023-03-17T13:09:34.10674Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"AUC_weighted","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'c929b1bb-c775-44ef-bdd2-dff13f706bd5',
  'user_agent': 'python/3.8.5 (Linux-5.15.0-1022-azure-x86_64-with-glibc2.10) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.47.0',
  'space_size': 'infinite_space_size',
  'score': '0.81852177760742',
  'best_child_run_id': 'HD_de53d3fa-674d-42d8-a352-1e3e33e7157c_46',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_de53d3fa-674d-42d8-a352-1e3e33e7157c_46'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  '

## Best Model

The best model according to the primary metric (F1 score) is selected and registered in the workspace.
We complete the run to make sure there is no bad behaviour.

In [12]:
#Complete run
hdr.complete()

# Get the best model according to metric and register as model
best_run = hdr.get_best_run_by_primary_metric()
best_run.register_model('covid_death_pred_hdr', 'outputs/Hyperdrive_capstone.joblib')

Model(workspace=Workspace.create(name='vg-adl-sco-dev-ml', subscription_id='d4b1a742-d22b-4598-975b-f7d380af08da', resource_group='vg-adl-sco-dev-rg'), name=covid_death_pred_hdr, id=covid_death_pred_hdr:6, version=6, tags={}, properties={})

In [13]:
# Show optimal parameters
print('Optimal run_id: ' + best_run.get_details()['runId'])
print('Hyperparameters for optimal Gradient Boosting model: ' + str(best_run.get_details()['runDefinition']['arguments']))

Optimal run_id: HD_de53d3fa-674d-42d8-a352-1e3e33e7157c_46
Hyperparameters for optimal Gradient Boosting model: ['--learning_rate', '0.9950538418256376', '--n_estimators', '50', '--subsample', '1']


## Model Deployment

Here the model is loaded and prepared for deployment. The imports are done here to separate the deployment part from the training part.
- Load model and workspace
- Define environment and easy entry script for deployment
- Define the deployment config and deploy
- Wait for successful deployment before continuing

In [29]:
from azureml.core.webservice import Webservice, AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model

ws = Workspace.from_config()
model = Model(ws, 'covid_death_pred_hdr')

sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')
inference_config = InferenceConfig(entry_script="covdata_entry.py", environment=sklearn_env)

# Deploy locally first
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1, auth_enabled=True,
                                            enable_app_insights=True)

web_service = Model.deploy(workspace=ws, 
                       name='covidsurvival2', 
                       models=[model], 
                       inference_config=inference_config, 
                       deployment_config = deployment_config)

web_service.wait_for_deployment(show_output=True)
print(f"Scoring URI is : {web_service.scoring_uri}")
print(f"Auth keys are is : {web_service.get_keys()}")

azureml.core.model:
To leverage new model deployment capabilities, AzureML recommends using CLI/SDK v2 to deploy models as online endpoint, 
please refer to respective documentations 
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoints /
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoint-sdk-v2 /
https://docs.microsoft.com/azure/machine-learning/how-to-attach-kubernetes-anywhere 
For more information on migration, see https://aka.ms/acimoemigration. 


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2023-01-27 10:50:48+00:00 Creating Container Registry if not exists.
2023-01-27 10:50:48+00:00 Registering the environment.
2023-01-27 10:50:51+00:00 Use the existing image.
2023-01-27 10:50:51+00:00 Generating deployment configuration.
2023-01-27 10:50:52+00:00 Submitting deployment to compute.
2023-01-27 10:50:56+00:00 Checking the status of deployment covidsurvival2..
2023-01-27 10:52:49+00:00 Checking the status of inference endpoint covidsurvival2.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Scoring URI is : http://c81de193-cc2c-426e-b21f-d6edb6a55ce6.southcentralus.azurecontainer.io/score
Auth keys are is : ('YpMvJv56BXvoF0uWKf1z5dylQ5U4HFMz', 'NBo9svzMAdWxKJy2gto5JQDZUOVFe0gM')


Send a test towards the deployed service to ensure the endpoint working correctly.

In [30]:
import json

data = {"data": [
          {
            "USMER": 2,
            "MEDICAL_UNIT": 1, 
            "SEX": 1,
            "INTUBED": 0,
            "PNEUMONIA": 0,
            "AGE": 35,
            "PREGNANT": 0,
            "DIABETES": 1,
            "COPD": 0,
            "ASTHMA": 0,
            "INMSUPR": 0, 
            "HIPERTENSION": 0,
            "OTHER_DISEASE": 0,
            "CARDIOVASCULAR": 0,
            "OBESITY": 1,
            "RENAL_CHRONIC": 0,
            "TOBACCO": 0,
            "ICU": 0, 
            "hospitalized": 0,
            "tested_positive": 0
          },
          {
            "USMER": 2,
            "MEDICAL_UNIT": 1, 
            "SEX": 1,
            "INTUBED": 1,
            "PNEUMONIA": 1,
            "AGE": 75,
            "PREGNANT": 0,
            "DIABETES": 1,
            "COPD": 0,
            "ASTHMA": 1,
            "INMSUPR": 0, 
            "HIPERTENSION": 0,
            "OTHER_DISEASE": 0,
            "CARDIOVASCULAR": 1,
            "OBESITY": 1,
            "RENAL_CHRONIC": 0,
            "TOBACCO": 0,
            "ICU": 0, 
            "hospitalized": 1,
            "tested_positive": 1
          }]
    }
# Convert to JSON string
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

In [31]:
import requests

# URL for the web service, should be similar to:
# 'http://8530a665-66f3-49c8-a953-b82a2d312917.eastus.azurecontainer.io/score'
scoring_uri = web_service.scoring_uri
# If the service is authenticated, set the key or token
#key = ''

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {web_service.get_keys()[0]}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())


[0, 1]


Get the logs of the deployed service and run the last cell to delete it.

In [32]:
web_service.get_logs()



In [None]:
web_service.delete()

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.

