In [1]:
# pip install -r /anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/automl/core/validated_linux_requirements.txt

## Azure ML

In [1]:
from azureml.core import Workspace, Experiment

#ws = Workspace.get(name="udacity-project")
ws = Workspace.from_config() 

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

exp = Experiment(workspace=ws, name="HD-incidents-project")
run = exp.start_logging()


# to get list of al experiments
# list_experiments = Experiment.list(ws)

Workspace name: quick-starts-ws-136346
Azure region: southcentralus
Subscription id: a24a24d5-8d87-4c8a-99b6-91ed2d2df51f
Resource group: aml-quickstarts-136346


## Dataset
### Overview

   * The incident management log data is retrieved from UCI Machine learnig data set. [Incident Data)[https://archive.ics.uci.edu/ml/datasets/Incident+management+process+enriched+event+log]. The data is sourced from a Servicenow platform for IT service management.

   * This provides details of various incidents recorded over period of time . I have taken up this project to predict ETA(expected time of accomplishment) to understand time to resolve each incident. This helps IT department to provide ETA for customers based on time taken to resolve similar issues historically. This will also help IT department to understand if any instance will go beyond expected SLA.


In [6]:
import pandas as pd
import numpy as np
import os
import zipfile
from zipfile import ZipFile
from zipfile import BadZipFile
from glob import iglob
from io import BytesIO
from urllib.request import urlopen

import dateutil.parser
from sklearn.model_selection import train_test_split
from source import train as src
import source.train as src


## AutoML Configuration

In [9]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException


# Choose a name for your CPU cluster
cpu_cluster_name = "cpu-cluster"

# Verify that the cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS3_V2',
                                                           idle_seconds_before_scaledown=2400,
                                                           min_nodes=0,
                                                           max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)


Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## HyperDrive Experiment

In [12]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive import *
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.core  import  script_run_config
from azureml.core import Environment
from azureml.core  import  ScriptRunConfig, Experiment
import os
import shutil

# Specify parameter sampler
#ps = ### YOUR CODE HERE ###
ps = RandomParameterSampling( {
    'max_depth': choice(2,5),
    'learning_rate': choice(1,10)
            }
    )
# Specify a Policy
#policy = ### YOUR CODE HERE ###
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)
if "source" not in os.listdir():
    os.mkdir("./source")

# shutil.copy('train.py', './source')
# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory = './source',
              entry_script = 'train-1.py',
              compute_target = cpu_cluster,
              user_managed = True)


sklearn_env =Environment.from_conda_specification(name = 'sklearn_env', file_path = './conda_env.yml') 

src = ScriptRunConfig(source_directory='./source',
                      compute_target=cpu_cluster,
                      script='train-1.py',
                      arguments=['--C', 1.0, '--max_iter', 100],
                      environment=sklearn_env)

             

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.

'''
hyperdrive_config = HyperDriveConfig(
                             hyperparameter_sampling=ps,
                             policy=policy,
                             primary_metric_name="Accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=18,
                             max_concurrent_runs=3,
                            estimator = est)
'''
hyperdrive_config = HyperDriveConfig(
        primary_metric_name='Rsquare',
        primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
        max_total_runs=10,
        max_concurrent_runs=3,
        hyperparameter_sampling=ps,
        policy=policy,
        estimator = est
        )


'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


## Submit Experiment

In [13]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
hyperdrive_run = exp.submit(hyperdrive_config)




# Monitor Hyperdrive

In [14]:
from azureml.widgets import RunDetails

RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)


_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec
Web View: https://ml.azure.com/experiments/HD-incidents-project/runs/HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec?wsid=/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourcegroups/aml-quickstarts-136346/workspaces/quick-starts-ws-136346

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-01-28T07:54:39.336854][API][INFO]Experiment created<END>\n""<START>[2021-01-28T07:54:39.889148][GENERATOR][INFO]Trying to sample '3' jobs from the hyperparameter space<END>\n""<START>[2021-01-28T07:54:40.059516][GENERATOR][INFO]Successfully sampled '3' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-01-28T07:54:41.5791385Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec
Web View: https://ml.azure.com/experiments/HD-incidents-project/runs/HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec?wsid=/subscript

{'runId': 'HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-28T07:54:39.080728Z',
 'endTimeUtc': '2021-01-28T08:02:17.812787Z',
 'properties': {'primary_metric_config': '{"name": "Rsquare", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'a67fabc8-aa46-4917-a5f4-0de834e3f4cb',
  'score': '0.4521358989023022',
  'best_child_run_id': 'HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg136346.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=Fnu3BmOsiMWuP9AIeBeiJLwaFgvbJrN4si7HaHHlJS0%3D&st=2021-01-28T07%3A52%3A23Z&se=2021-01-28T16%3A02%3A23Z&sp=r'},
 'submittedBy': 'ODL_User 136346'}

In [15]:
assert(hyperdrive_run.get_status() == "Completed")

# Best Model

In [16]:

hd_best_run = hyperdrive_run.get_best_run_by_primary_metric()
hd_best_run.get_details()
print(hd_best_run)
print(hd_best_run.get_metrics())


hd_best_run.wait_for_completion(show_output=True)


Run(Experiment: HD-incidents-project,
Id: HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3,
Type: azureml.scriptrun,
Status: Completed)
{'Regularization Strength:': 2.0, 'Max iterations:': 1, 'Rsquare': 0.4521358989023022, 'rms': 429.142176795971, 'Max depth:': 2.0, 'Learning rate:': 1}
RunId: HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3
Web View: https://ml.azure.com/experiments/HD-incidents-project/runs/HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3?wsid=/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourcegroups/aml-quickstarts-136346/workspaces/quick-starts-ws-136346

Execution Summary
RunId: HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3
Web View: https://ml.azure.com/experiments/HD-incidents-project/runs/HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3?wsid=/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourcegroups/aml-quickstarts-136346/workspaces/quick-starts-ws-136346



{'runId': 'HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-28T08:01:03.590504Z',
 'endTimeUtc': '2021-01-28T08:01:43.094382Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': 'a67fabc8-aa46-4917-a5f4-0de834e3f4cb',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'train-1.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--learning_rate', '1', '--max_depth', '2'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'cpu-cluster',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'jobName': None,
  'maxRunDurationSeconds': None,
  'nodeCount': 1,
  'priority': None,
  'credentialPassthrough': False,
  'environment': {'name': 'Experiment HD-incidents-project Environment',


In [18]:
from azureml.core import get_run
hd_run_cpu_id = 'HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3'
# run_cpu_id = 'AutoML_84449a21-af00-48a4-81da-d6a40683d1e2' #get from portal
hd_best_run_id = get_run(exp, hd_run_cpu_id)

In [19]:
print(hd_best_run_id)

Run(Experiment: HD-incidents-project,
Id: HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3,
Type: azureml.scriptrun,
Status: Completed)


## HyperDrive Model 

In [20]:
#define model
hd_model = hd_best_run.register_model(model_name='HD_model_project', model_path='./')

print(" Registered HD Nodel =", hd_model)


 Registered HD Nodel = Model(workspace=Workspace.create(name='quick-starts-ws-136346', subscription_id='a24a24d5-8d87-4c8a-99b6-91ed2d2df51f', resource_group='aml-quickstarts-136346'), name=HD_model_project, id=HD_model_project:1, version=1, tags={}, properties={})


In [21]:
print(hd_model.description)
print(hd_best_run)

None
Run(Experiment: HD-incidents-project,
Id: HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3,
Type: azureml.scriptrun,
Status: Completed)


## Hyperdrive Metrcis


In [22]:
import joblib
# Get your best run and save the model from that run.

### YOUR CODE HERE ###
hd_best_run = hyperdrive_run.get_best_run_by_primary_metric()
hd_best_run_metrics = hd_best_run.get_metrics()

parameter_values = hd_best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', hd_best_run.id)
print('\n R2_Score:', hd_best_run_metrics['Rsquare'])
#print('\n learning rate:',parameter_values[3])
#print('\n keep probability:',parameter_values[5])
#print('\n batch size:',parameter_values[7])

hyperdrive_run.wait_for_completion(show_output=True)



Best Run Id:  HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3

 R2_Score: 0.4521358989023022
RunId: HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec
Web View: https://ml.azure.com/experiments/HD-incidents-project/runs/HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec?wsid=/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourcegroups/aml-quickstarts-136346/workspaces/quick-starts-ws-136346

Execution Summary
RunId: HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec
Web View: https://ml.azure.com/experiments/HD-incidents-project/runs/HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec?wsid=/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourcegroups/aml-quickstarts-136346/workspaces/quick-starts-ws-136346



{'runId': 'HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-28T07:54:39.080728Z',
 'endTimeUtc': '2021-01-28T08:02:17.812787Z',
 'properties': {'primary_metric_config': '{"name": "Rsquare", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'a67fabc8-aa46-4917-a5f4-0de834e3f4cb',
  'score': '0.4521358989023022',
  'best_child_run_id': 'HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec_3',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg136346.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_ecfc9076-4ec0-4677-8037-1a6fa97efbec/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=Fnu3BmOsiMWuP9AIeBeiJLwaFgvbJrN4si7HaHHlJS0%3D&st=2021-01-28T07%3A52%3A23Z&se=2021-01-28T16%3A02%3A23Z&sp=r'},
 'submittedBy': 'ODL_User 136346'}

## Register the HD Model

In [25]:
import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

model = Model.register(workspace=ws,
                       model_name='HD_model_project',                # Name of the registered model in your workspace.
                       model_path='./',  # Local file to upload and register as a model.
                       model_framework=Model.Framework.SCIKITLEARN,      
                       description='linear regression model to predict days to close.')

print('Name:', model.name)
print('Version:', model.version)

Registering model HD_model_project
Name: HD_model_project
Version: 2


In [None]:
cpu_cluster.delete()