# Hyperparameter Tuning using HyperDrive

Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [8]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

#!pip install azureml-sdk==1.19.0

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)


SDK version: 1.20.0


In [9]:
from azureml.core.compute import ComputeTarget, AmlCompute

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

MYcompute_cluster = "pimadiabetes"

# Verify that cluster does not exist already
try:
    aml_compute = ComputeTarget(workspace=ws, name=MYcompute_cluster)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    aml_compute = ComputeTarget.create(ws, MYcompute_cluster, compute_config)

aml_compute.wait_for_completion(show_output=True)

trn-aue-aa-ml
trn-aue-advanced-analytics-ml
australiaeast
8a086c9c-5530-4791-89af-62c8cdfda3fd
Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Dataset

Get data. Reference to datahub.io dataset on pima women diabetes. https://datahub.io/machine-learning/diabetes/r/diabetes.csv

In [10]:
# choose a name for experiment
experiment_name = 'automlpimadiabetesps'
project_folder = './Augusto_Avila/capstone'

experiment=Experiment(ws, experiment_name)
ds = Dataset.get_by_name(ws, name='pimadiabetes')

## Hyperdrive Configuration - Parameter Sampling

Using a classification model and a random sampling hyperdrive configuration. Bandit policy chosed for termination policy with a slack factor of 30% based
on the results of the previous automl run.

In [4]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive import RandomParameterSampling
from azureml.train.hyperdrive import normal, choice,uniform
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
import os

# Specify parameter sampler
#ps = RandomParameterSampling ( { "learning_rate ":normal(10,3), "keep_probability": uniform(0.05,0.1), "batch_size": choice(16,32,64,128)} )
#ps = RandomParameterSampling ( {"--max_iter":choice(30,50,100),"--C":choice(0.5,1,1.5)} )
#ps = RandomParameterSampling ( {"--max_iter":choice(30,150,300),"--C":choice(0.5,1,1.5)} )
ps = RandomParameterSampling ( {"--max_iter":choice(100,500,1000),"--C":choice(0.5,1,1.5,2.0,2.5)} )

primary_metric_name = "AUC_weighted"
primary_metric_goal = PrimaryMetricGoal.MAXIMIZE

# Specify a Policy
policy = BanditPolicy(slack_factor=0.30,evaluation_interval=1,delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training") 


# Create a SKLearn estimator for use with train.py
est = SKLearn (source_directory = "./", 
               entry_script = 'train.py',
               compute_target = MYcompute_cluster)


# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig ( 
    estimator=est,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name=primary_metric_name,
    primary_metric_goal=primary_metric_goal,
    max_total_runs=100,
    max_concurrent_runs=5)

In [5]:

### YOUR CODE HERE ###
from azureml.core.experiment import Experiment
#!pip3 install azureml.widgets in dos prompt
from azureml.widgets import RunDetails
from azureml.core.run import Run

#exp = Experiment (ws,ws.name)
run = experiment.start_logging()
expRun = experiment.submit(hyperdrive_config,show_output=True)


RunDetails(expRun).show()
#print(run.get_portal_url())
#expRun.wait_for_completion(show_output=True)



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

## Run Details

Best Metric obtained on AUC_weighted obtained in the 76,77 & 81 runs where regularization strenght parameter value was 1.5. The PrimaryMetricGoal defines whether the minimum or maximum of the primary metric is used. Only one of the runs is returned, even if several of the Runs launched by this HyperDrive run reached the same best metric.


In [15]:
#!pip install azureml-widgets==1.20 
RunDetails(expRun).show()
print(run.get_portal_url())
expRun.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

https://ml.azure.com/experiments/automlpimadiabetesps/runs/098019f5-8985-4722-80fa-ea0ba7e3174e?wsid=/subscriptions/8a086c9c-5530-4791-89af-62c8cdfda3fd/resourcegroups/trn-aue-advanced-analytics-ml/workspaces/trn-aue-aa-ml
RunId: HD_e5e25e6a-80f3-4181-b6b2-dcfaaf8a7e5b
Web View: https://ml.azure.com/experiments/automlpimadiabetesps/runs/HD_e5e25e6a-80f3-4181-b6b2-dcfaaf8a7e5b?wsid=/subscriptions/8a086c9c-5530-4791-89af-62c8cdfda3fd/resourcegroups/trn-aue-advanced-analytics-ml/workspaces/trn-aue-aa-ml

Execution Summary
RunId: HD_e5e25e6a-80f3-4181-b6b2-dcfaaf8a7e5b
Web View: https://ml.azure.com/experiments/automlpimadiabetesps/runs/HD_e5e25e6a-80f3-4181-b6b2-dcfaaf8a7e5b?wsid=/subscriptions/8a086c9c-5530-4791-89af-62c8cdfda3fd/resourcegroups/trn-aue-advanced-analytics-ml/workspaces/trn-aue-aa-ml



{'runId': 'HD_e5e25e6a-80f3-4181-b6b2-dcfaaf8a7e5b',
 'target': 'pimadiabetes',
 'status': 'Completed',
 'startTimeUtc': '2021-02-02T20:03:52.544793Z',
 'endTimeUtc': '2021-02-02T20:16:49.369968Z',
 'properties': {'primary_metric_config': '{"name": "AUC_weighted", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '895e4960-e7eb-412f-ba6c-972d871f8690',
  'score': '0.71869918699187',
  'best_child_run_id': 'HD_e5e25e6a-80f3-4181-b6b2-dcfaaf8a7e5b_0',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://trnaueaaml.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_e5e25e6a-80f3-4181-b6b2-dcfaaf8a7e5b/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=T31nOcykB9xySZdJrULusU5%2F%2BrIiaCoHTgNIn5O5gHs%3D&st=2021-02-04T11%3A18%3A36Z&se=2021-02-04T19%3A28%3A36Z&sp=r'},
 'submittedBy': 'Augusto Avila'}

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [7]:
import joblib
import json 
# Get your best run and save the model from that run.

if "output" not in os.listdir():
    os.mkdir("./output")   

best_run = expRun.get_best_run_by_primary_metric()
#print (best_run)
best_run_metrics = best_run.get_metrics('AUC_weighted')
parameter_values = best_run.get_details()['runDefinition']['arguments']

print ('Best run id:',best_run.id)
print ('################################')
print ('\n AUC_weighted:',best_run_metrics)
print ('################################')
print ('\n Learning rate:', parameter_values)
print ('################################')

print (best_run.get_tags())



Best run id: HD_e5e25e6a-80f3-4181-b6b2-dcfaaf8a7e5b_0
################################

 AUC_weighted: {'AUC_weighted': 0.71869918699187}
################################

 Learning rate: ['--C', '2.5', '--max_iter', '100']
################################
{'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}'}


## Best Model

Get the best model from the hyperdrive experiments and display all the properties of the model.

In [18]:
# Save the best model

model = best_run.register_model(model_name='ParamSampbestmodel.pkl', model_path='.',
tags={'area': "diabetes", 'type': "Classification"},
description="Best Model using Hyperdrive Parameter Sampling"
)

print ('Model Name',model.name)
print ('Model Version',model.version)
print ('Model Tags',model.tags) 
print ('Model Description', model.description)
model.properties

Model Name ParamSampbestmodel.pkl
Model Version 5
Model Tags {'area': 'diabetes', 'type': 'Classification'}
Model Description Best Model using Hyperdrive Parameter Sampling


{}