# Hyperparameter Tuning using HyperDrive

Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

#!pip install azureml-sdk==1.19.0

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)


SDK version: 1.20.0


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

MYcompute_cluster = "pimadiabetes"

# Verify that cluster does not exist already
try:
    aml_compute = ComputeTarget(workspace=ws, name=MYcompute_cluster)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    aml_compute = ComputeTarget.create(ws, MYcompute_cluster, compute_config)

aml_compute.wait_for_completion(show_output=True)

trn-aue-aa-ml
trn-aue-advanced-analytics-ml
australiaeast
8a086c9c-5530-4791-89af-62c8cdfda3fd
Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Dataset

Get data. Reference to datahub.io dataset on pima women diabetes. https://datahub.io/machine-learning/diabetes/r/diabetes.csv

In [3]:
# choose a name for experiment
experiment_name = 'automlpimadiabetesGS'
project_folder = './Augusto_Avila/capstone'

experiment=Experiment(ws, experiment_name)
ds = Dataset.get_by_name(ws, name='pimadiabetes')

## Hyperdrive Configuration - Grid Sampling

Using a classification model and a Grid sampling hyperdrive configuration. Bandit policy chosed for termination policy with a slack factor of 30% based
on the results of the previous automl run.

In [4]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive import GridParameterSampling
from azureml.train.hyperdrive import normal, choice,uniform
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
import os


# Specify parameter sampler
#ps = RandomParameterSampling ( { "learning_rate ":normal(10,3), "keep_probability": uniform(0.05,0.1), "batch_size": choice(16,32,64,128)} )
#ps = GridParameterSampling ( {"--max_iter":choice(30,50,100),"--C":choice(0.5,1,1.5)} )
#ps = GridParameterSampling ( {"--max_iter":choice(30,150,100),"--C":choice(0.5,1,1.5)} )
ps = GridParameterSampling ( {"--max_iter":choice(100,500,1000),"--C":choice(0.5,1,1.5,2.0,2.5)} )

primary_metric_name = "AUC_weighted"
primary_metric_goal = PrimaryMetricGoal.MAXIMIZE

# Specify a Policy
policy = BanditPolicy(slack_factor=0.30,evaluation_interval=1,delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training") 


# Create a SKLearn estimator for use with train.py
est = SKLearn (source_directory = "./", 
               entry_script = 'train.py',
               compute_target = MYcompute_cluster)


# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig ( 
    estimator=est,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name=primary_metric_name,
    primary_metric_goal=primary_metric_goal,
    max_total_runs=100,
    max_concurrent_runs=5)

In [6]:

### YOUR CODE HERE ###
from azureml.core.experiment import Experiment
#!pip3 install azureml.widgets in dos prompt
from azureml.widgets import RunDetails
from azureml.core.run import Run

#exp = Experiment (ws,ws.name)
run = experiment.start_logging()
expRun = experiment.submit(hyperdrive_config,show_output=True)


RunDetails(expRun).show()
print(run.get_portal_url())
expRun.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

https://ml.azure.com/experiments/automlpimadiabetesGS/runs/f72a78a9-4534-4f8b-bd59-5275f1f3b904?wsid=/subscriptions/8a086c9c-5530-4791-89af-62c8cdfda3fd/resourcegroups/trn-aue-advanced-analytics-ml/workspaces/trn-aue-aa-ml
RunId: HD_92dd3a63-d9c3-4064-a24d-e03761501977
Web View: https://ml.azure.com/experiments/automlpimadiabetesGS/runs/HD_92dd3a63-d9c3-4064-a24d-e03761501977?wsid=/subscriptions/8a086c9c-5530-4791-89af-62c8cdfda3fd/resourcegroups/trn-aue-advanced-analytics-ml/workspaces/trn-aue-aa-ml

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-02-04T11:36:51.483861][API][INFO]Experiment created<END>\n""<START>[2021-02-04T11:36:51.967044][GENERATOR][INFO]Trying to sample '5' jobs from the hyperparameter space<END>\n""<START>[2021-02-04T11:36:52.150743][GENERATOR][INFO]Successfully sampled '5' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-02-04T11:36:52.4755472Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as i

{'runId': 'HD_92dd3a63-d9c3-4064-a24d-e03761501977',
 'target': 'pimadiabetes',
 'status': 'Completed',
 'startTimeUtc': '2021-02-04T11:36:51.224829Z',
 'endTimeUtc': '2021-02-04T11:49:09.376564Z',
 'properties': {'primary_metric_config': '{"name": "AUC_weighted", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'df9850e6-9a6f-4074-a0f2-5ffde2ee1e75',
  'score': '0.71869918699187',
  'best_child_run_id': 'HD_92dd3a63-d9c3-4064-a24d-e03761501977_12',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://trnaueaaml.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_92dd3a63-d9c3-4064-a24d-e03761501977/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=D2WdG90vJDcpGf2GlHQvaCuSS0GIB7UIiAUO8WUfH2E%3D&st=2021-02-04T11%3A39%3A36Z&se=2021-02-04T19%3A49%3A36Z&sp=r'},
 'submittedBy': 'Augusto Avila'}

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [8]:
import joblib
import json 
# Get your best run and save the model from that run.

if "output" not in os.listdir():
    os.mkdir("./output")   

best_run = expRun.get_best_run_by_primary_metric()
#print (best_run)
best_run_metrics = best_run.get_metrics('AUC_weighted')
parameter_values = best_run.get_details()['runDefinition']['arguments']

print ('Best run id:',best_run.id)
print ('################################')
print ('\n AUC_weighted:',best_run_metrics)
print ('################################')
print ('\n Learning rate:', parameter_values)
print ('################################')

print (best_run.get_tags())



Best run id: HD_32d38e02-bbeb-4725-9506-304cb2847450_6
################################

 AUC_weighted: {'AUC_weighted': 0.7138211382113822}
################################

 Learning rate: ['--max_iter', '30', '--C', '1.5']
################################
{'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":5,"CurrentNodeCount":5}'}
