# HyperDrive

### Create a workspace and an experiment

In [1]:
from azureml.core import Workspace, Experiment
ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')
experiment_name = 'udacity-project'
exp=Experiment(ws, experiment_name)

run = exp.start_logging()

Workspace name: quick-starts-ws-132408
Azure region: southcentralus
Subscription id: 9b72f9e6-56c5-4c16-991b-19c652994860
Resource group: aml-quickstarts-132408


### Create a Compute Cluster

In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

# Choose a name for your CPU cluster
from azureml.core.compute_target import ComputeTargetException
cpu_cluster_name = "cpu-cluster-mla"

   # Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                              max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

    cpu_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.


### Configure HyperDrive Run

In [3]:
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core.dataset import Dataset
from azureml.data.datapath import DataPath

found = False
key = "iris-classification"
description_text = "classification of Iris dataset for capstone project"

if key in ws.datasets.keys(): 
        found = True        
        dataset = ws.datasets[key] 

if not found:
        # Create Hyperdrive Dataset and register it into Workspace
        url_path = 'https://raw.githubusercontent.com/dib1979/Temporary/main/IRIS.csv'
        dataset = TabularDatasetFactory.from_delimited_files(path=url_path)       
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                    name= key,
                    description= description_text)


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [4]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os
#import numpy as np

from azureml.core import Environment
sklearn_env = Environment.get(workspace=ws, name='AzureML-Tutorial')

from azureml.core import ScriptRunConfig
compute_target = ws.compute_targets['cpu-cluster-mla']
src = ScriptRunConfig(source_directory='.',
                      script='train1.py',
                      arguments=['--kernel', 'linear', '--penalty', 1.0],
                      compute_target=compute_target,
                      environment=sklearn_env)




In [5]:
run = exp.submit(src)
from azureml.widgets import RunDetails

RunDetails(run).show()
run.wait_for_completion(show_output=True)

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

RunId: udacity-project_1609290975_e46d37e4
Web View: https://ml.azure.com/experiments/udacity-project/runs/udacity-project_1609290975_e46d37e4?wsid=/subscriptions/9b72f9e6-56c5-4c16-991b-19c652994860/resourcegroups/aml-quickstarts-132408/workspaces/quick-starts-ws-132408

Streaming azureml-logs/55_azureml-execution-tvmps_6198316abdb4791c2ae5c476ec2eef9f303aa756e9b185408faa818b99644e44_d.txt

2020-12-30T01:19:06Z Starting output-watcher...
2020-12-30T01:19:06Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
2020-12-30T01:19:06Z Executing 'Copy ACR Details file' on 10.0.0.5
2020-12-30T01:19:06Z Copy ACR Details file succeeded on 10.0.0.5. Output: 
>>>   
>>>   
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_d97f122bdcaea8212b2eadb5cef56a6d
2c11b7cecaa5: Pulling fs layer
04637fa56252: Pulling fs layer
d6e6af23a0f3: Pulling fs layer
b4a424de92ad: Pulling fs layer
3e5d9ee64909: Pulling fs layer
3a846111ff22: Pulling fs layer
93a5020c6e19: Pullin

{'runId': 'udacity-project_1609290975_e46d37e4',
 'target': 'cpu-cluster-mla',
 'status': 'Completed',
 'startTimeUtc': '2020-12-30T01:19:01.122185Z',
 'endTimeUtc': '2020-12-30T01:22:15.509841Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': '25cbfa5e-7154-48d2-aef5-d8e7a86b394b',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'train1.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--kernel', 'linear', '--penalty', '1'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'cpu-cluster-mla',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'jobName': None,
  'maxRunDurationSeconds': 2592000,
  'nodeCount': 1,
  'priority': None,
  'credentialPassthrough': False,
  'environment': {'name': 'AzureML-Tutorial',
   'version': '60',
   'pyt

In [6]:
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice

param_sampling = RandomParameterSampling( {
    "--kernel": choice('linear', 'rbf', 'poly', 'sigmoid'),
    "--penalty": choice(0.5, 1, 1.5)
    }
)
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1,delay_evaluation=5)
hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=param_sampling,
                                     policy = policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=12,
                                     max_concurrent_runs=4)

In [7]:
# start the HyperDrive run
hyperdrive_run = exp.submit(hyperdrive_config)
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)

assert(hyperdrive_run.get_status() == "Completed")

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_da6e1617-a95e-4f6d-b8c5-53b475550036
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_da6e1617-a95e-4f6d-b8c5-53b475550036?wsid=/subscriptions/9b72f9e6-56c5-4c16-991b-19c652994860/resourcegroups/aml-quickstarts-132408/workspaces/quick-starts-ws-132408

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-12-30T01:24:24.276458][API][INFO]Experiment created<END>\n""<START>[2020-12-30T01:24:24.870526][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2020-12-30T01:24:25.185914][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2020-12-30T01:24:26.3488017Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_da6e1617-a95e-4f6d-b8c5-53b475550036
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_da6e1617-a95e-4f6d-b8c5-53b475550036?wsid=/subscriptions/9b72f

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

### Get your best run and save the model from that run.

In [9]:
import joblib
# Get your best run and save the model from that run.

best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])

Best Run Id:  HD_da6e1617-a95e-4f6d-b8c5-53b475550036_1

 Accuracy: 0.9777777777777777


In [10]:
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

model = best_run.register_model(model_name='best_fit-hyperdrive-model', 
                           model_path='outputs/model.joblib',
                           model_framework=Model.Framework.SCIKITLEARN,
                           model_framework_version='0.19.1',
                           resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5))
print(model.name, model.id, model.version, sep='\t')

best_fit-hyperdrive-model	best_fit-hyperdrive-model:1	1


In [11]:
print(model)

Model(workspace=Workspace.create(name='quick-starts-ws-132408', subscription_id='9b72f9e6-56c5-4c16-991b-19c652994860', resource_group='aml-quickstarts-132408'), name=best_fit-hyperdrive-model, id=best_fit-hyperdrive-model:1, version=1, tags={}, properties={})


## MODEL DEPLOYMENT
### Best hyperdrive model has an Accuracy of 0.97778
### Best AutoML model [VotingEnsemble] has an accuracy of 0.9810
### Thus we would proceed with the deployment of the VotingEnsemble model

In [12]:
cpu_cluster.delete()

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

