In [1]:
import azureml.automl.runtime
azureml.automl.runtime.__version__

'1.20.0'

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
csv_path='https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'
ds = TabularDatasetFactory.from_delimited_files(csv_path)
### YOUR CODE HERE ###
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

# Compering HyperDrive and  AutoML in Azure ML 

In [2]:
from azureml.core import Workspace, Experiment

ws = Workspace.get(name="quick-starts-ws-135784",
               subscription_id='f5091c60-1c3c-430f-8d81-d802f6bf2414',
               resource_group='aml-quickstarts-135784')

exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-135784
Azure region: southcentralus
Subscription id: f5091c60-1c3c-430f-8d81-d802f6bf2414
Resource group: aml-quickstarts-135784


In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
cpu_cluster_name = "cpu-cluster"
vm_size='STANDARD_D2_V2'

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except:
    compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size, max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

# Can poll for a minimum number of nodes and for a specific timeout. 
# If no min node count is provided it uses the scale settings for the cluster.
compute_target.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


##  Part1. Hyperparameter Tuning Using HyperDrive

In [4]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    parameter_space ={
        '--C' : choice(0.001,0.01,0.1,1,10,20,50,100,200,500,1000),
        '--max_iter': choice(50,100,200,300)}
)

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory = "./",
            compute_target=compute_target,
            entry_script="train.py")

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(hyperparameter_sampling=ps, 
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     policy=policy,
                                     estimator=est,
                                     max_total_runs=16,
                                     max_concurrent_runs=4)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [5]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
# Start the HyperDrive run
hyperdrive_run = exp.submit(hyperdrive_config)

# Monitor HyperDrive runs You can monitor the progress of the runs with the following Jupyter widget
RunDetails(hyperdrive_run).show()

# hyperdrive_run.wait_for_completion(show_output=True)



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [10]:
# Get your best run and save the model from that run.
from azureml.core.model import Model

### YOUR CODE HERE ###
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()['Accuracy']
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print('Accuracy:', best_run_metrics)
print('C:',parameter_values[1])
print('Max Iter:',parameter_values[3])

best_run.download_file("/outputs/model.joblib", "Hyperdrive.joblib")
#model = best_run.register_model(model_name = 'Logistic_reg_hd_bankmarketing', model_path = 'outputs/model.joblib')


Best Run Id:  HD_c9578e91-0459-4099-9d1e-d58a85610196_13
Accuracy: 0.9142995872784656
C: 0.001
Max Iter: 50


## Part2. Hyperparammeter Tuning and Model Training using AutoML

In [8]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data = ds,
    label_column_name = "y",
    n_cross_validations=5,
    compute_target=compute_target)

In [9]:
# Submit your automl run

exp = Experiment(workspace=ws, name="AutoML-project")
automl_run = exp.submit(config = automl_config)

# Reference: lesson 6.3: running a history widget to show the progress
RunDetails(automl_run).show()

Running on remote.


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [11]:
# Retrieve and save your best automl model.
best_run, fitted_model = automl_run.get_output()

In [12]:
# get_metrics()
# Returns the metrics
print("Best run metrics :",best_run.get_metrics())
# get_details()
# Returns a dictionary with the details for the run
print("Best run details :",best_run.get_details())

Best run metrics : {'recall_score_weighted': 0.9172382397572079, 'f1_score_macro': 0.7802201270776541, 'balanced_accuracy': 0.7655628182824425, 'average_precision_score_macro': 0.8264759358475029, 'average_precision_score_micro': 0.9809321012568853, 'recall_score_macro': 0.7655628182824425, 'matthews_correlation': 0.5623913918639267, 'f1_score_micro': 0.9172382397572078, 'precision_score_macro': 0.7978994763201616, 'f1_score_weighted': 0.9148617812601323, 'norm_macro_recall': 0.5311256365648853, 'AUC_micro': 0.980298060472367, 'precision_score_micro': 0.9172382397572079, 'average_precision_score_weighted': 0.9554724126232618, 'log_loss': 0.23604806561655792, 'precision_score_weighted': 0.9132659981810324, 'AUC_weighted': 0.9460275224275648, 'accuracy': 0.9172382397572079, 'recall_score_micro': 0.9172382397572079, 'AUC_macro': 0.9460275224275648, 'weighted_accuracy': 0.954935890073607, 'confusion_matrix': 'aml://artifactId/ExperimentRun/dcid.AutoML_48b82edb-e1c7-4573-8d1e-f8c8535cd0e6_1

In [26]:
fitted_model._final_estimator

PreFittedSoftVotingClassifier(classification_labels=None,
                              estimators=[('1',
                                           Pipeline(memory=None,
                                                    steps=[('maxabsscaler',
                                                            MaxAbsScaler(copy=True)),
                                                           ('xgboostclassifier',
                                                            XGBoostClassifier(base_score=0.5,
                                                                              booster='gbtree',
                                                                              colsample_bylevel=1,
                                                                              colsample_bynode=1,
                                                                              colsample_bytree=1,
                                                                              gamma=0,
              

In [30]:
pprint(fitted_model)

PipelineWithYTransformations(Pipeline={'memory': None,
                                       'steps': [('datatransformer',
                                                  DataTransformer(enable_dnn=None,
                                                                  enable_feature_sweeping=None,
                                                                  feature_sweeping_config=None,
                                                                  feature_sweeping_timeout=None,
                                                                  featurization_config=None,
                                                                  force_text_dnn=None,
                                                                  is_cross_validation=None,
                                                                  is_onnx_compatible=None,
                                                                  logger=None,
                                                              

In [43]:
from pprint import pprint
def print_model(model, prefix=""):
    """
    This method will help us to get best Hyperparameters of model 
    """
    pprint(model)
    
    
for step in fitted_model.steps:
    print(step[0])
    if hasattr(step[1], 'estimators') and hasattr(step[1], 'weights'):
        pprint({'estimators': list(e[0] for e in step[1].estimators), 'weights': step[1].weights})
        print()
        for estimator in step[1].estimators:
            print_model(estimator[1], estimator[0] + ' - ')
    else:
        pprint(step[1].get_params())
        print()
        print_model(fitted_model)

datatransformer
{'enable_dnn': None,
 'enable_feature_sweeping': None,
 'feature_sweeping_config': None,
 'feature_sweeping_timeout': None,
 'featurization_config': None,
 'force_text_dnn': None,
 'is_cross_validation': None,
 'is_onnx_compatible': None,
 'logger': None,
 'observer': None,
 'task': None,
 'working_dir': None}

PipelineWithYTransformations(Pipeline={'memory': None,
                                       'steps': [('datatransformer',
                                                  DataTransformer(enable_dnn=None,
                                                                  enable_feature_sweeping=None,
                                                                  feature_sweeping_config=None,
                                                                  feature_sweeping_timeout=None,
                                                                  featurization_config=None,
                                                                  force_text_dnn=N

In [40]:
best_run.get_tags()

{'_aml_system_azureml.automlComponent': 'AutoML',
 '_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":1}',
 'ensembled_iterations': '[1, 0, 14, 11, 6, 9, 5]',
 'ensembled_algorithms': "['XGBoostClassifier', 'LightGBM', 'XGBoostClassifier', 'XGBoostClassifier', 'XGBoostClassifier', 'LogisticRegression', 'RandomForest']",
 'ensemble_weights': '[0.15384615384615385, 0.15384615384615385, 0.15384615384615385, 0.07692307692307693, 0.15384615384615385, 0.07692307692307693, 0.23076923076923078]',
 'best_individual_pipeline_score': '0.9149317147192717',
 'best_individual_iteration': '1',
 '_aml_system_automl_is_child_run_end_telemetry_event_logged': 'True',
 'model_explain_run_id': 'AutoML_48b82edb-e1c7-4573-8d1e-f8c8535cd0e6_ModelExplain',
 'model_explanation': 'True'}

In [41]:
#Save the model
best_run.register_model(model_name = 'automl_model.pkl', model_path = './outputs/')

Model(workspace=Workspace.create(name='quick-starts-ws-135784', subscription_id='f5091c60-1c3c-430f-8d81-d802f6bf2414', resource_group='aml-quickstarts-135784'), name=automl_model.pkl, id=automl_model.pkl:5, version=5, tags={}, properties={})

In [42]:
# delete compute cluster
compute_target.delete()

ComputeTargetException: ComputeTargetException:
	Message: Received bad response from Resource Provider:
Response Code: 500
Headers: {'Cache-Control': 'no-cache', 'Pragma': 'no-cache', 'Content-Length': '1458', 'Content-Type': 'application/json; charset=utf-8', 'Expires': '-1', 'x-ms-failure-cause': 'service', 'Request-Context': 'appId=cid-v1:2d2e8e63-272e-4b3c-8598-4ee570a0e70d', 'x-ms-response-type': 'error', 'x-ms-client-request-id': 'cc037684-56d4-45d3-9111-a9ee54beeef5', 'x-ms-client-session-id': '3f4c1391-b522-4cd1-993c-073658ac8e79', 'X-Content-Type-Options': 'nosniff', 'x-request-time': '0.044', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains', 'x-ms-ratelimit-remaining-subscription-deletes': '14999', 'x-ms-request-id': '3877dbc5-04fa-407d-a904-66c3219cbb26', 'x-ms-correlation-request-id': '3877dbc5-04fa-407d-a904-66c3219cbb26', 'x-ms-routing-request-id': 'SOUTHCENTRALUS:20210123T164716Z:3877dbc5-04fa-407d-a904-66c3219cbb26', 'Date': 'Sat, 23 Jan 2021 16:47:15 GMT', 'Connection': 'close'}
Content: b'{\n  "error": {\n    "code": "ServiceError",\n    "severity": null,\n    "message": "Received 404 from a service request",\n    "messageFormat": null,\n    "messageParameters": null,\n    "referenceCode": null,\n    "detailsUri": null,\n    "target": "GET https://southcentralus.api.azureml.ms/mlc/subscriptions/f5091c60-1c3c-430f-8d81-d802f6bf2414/resourceGroups/aml-quickstarts-135784/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-135784/computes/cpu-cluster?workspaceId=5dd7bf2e-819b-4686-b6e9-ee7f6000d88d&api-version=2019-11-01",\n    "details": [\n      {\n        "code": "NotFound",\n        "severity": null,\n        "message": "{\\"error\\":{\\"code\\":\\"ResourceNotFound\\",\\"message\\":\\"The resource was not found.\\",\\"innererror\\":{\\"clientRequestId\\":\\"cc037684-56d4-45d3-9111-a9ee54beeef5\\",\\"serviceRequestId\\":\\"|00-044bf478f3a6184681ecb1f4f8abd29f-74b5c905e60d6c48-00.ccdceea0_\\"}}}",\n        "messageFormat": null,\n        "messageParameters": {},\n        "referenceCode": null,\n        "detailsUri": null,\n        "target": null,\n        "details": [],\n        "innerError": null,\n        "debugInfo": null\n      }\n    ],\n    "innerError": null,\n    "debugInfo": null\n  },\n  "correlation": {\n    "operation": "044bf478f3a6184681ecb1f4f8abd29f",\n    "request": "8c220f32e2e45248"\n  },\n  "environment": "southcentralus",\n  "location": "southcentralus",\n  "time": "2021-01-23T16:47:16.0091921+00:00",\n  "componentName": "account-rp"\n}'
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Received bad response from Resource Provider:\nResponse Code: 500\nHeaders: {'Cache-Control': 'no-cache', 'Pragma': 'no-cache', 'Content-Length': '1458', 'Content-Type': 'application/json; charset=utf-8', 'Expires': '-1', 'x-ms-failure-cause': 'service', 'Request-Context': 'appId=cid-v1:2d2e8e63-272e-4b3c-8598-4ee570a0e70d', 'x-ms-response-type': 'error', 'x-ms-client-request-id': 'cc037684-56d4-45d3-9111-a9ee54beeef5', 'x-ms-client-session-id': '3f4c1391-b522-4cd1-993c-073658ac8e79', 'X-Content-Type-Options': 'nosniff', 'x-request-time': '0.044', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains', 'x-ms-ratelimit-remaining-subscription-deletes': '14999', 'x-ms-request-id': '3877dbc5-04fa-407d-a904-66c3219cbb26', 'x-ms-correlation-request-id': '3877dbc5-04fa-407d-a904-66c3219cbb26', 'x-ms-routing-request-id': 'SOUTHCENTRALUS:20210123T164716Z:3877dbc5-04fa-407d-a904-66c3219cbb26', 'Date': 'Sat, 23 Jan 2021 16:47:15 GMT', 'Connection': 'close'}\nContent: b'{\\n  \"error\": {\\n    \"code\": \"ServiceError\",\\n    \"severity\": null,\\n    \"message\": \"Received 404 from a service request\",\\n    \"messageFormat\": null,\\n    \"messageParameters\": null,\\n    \"referenceCode\": null,\\n    \"detailsUri\": null,\\n    \"target\": \"GET https://southcentralus.api.azureml.ms/mlc/subscriptions/f5091c60-1c3c-430f-8d81-d802f6bf2414/resourceGroups/aml-quickstarts-135784/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-135784/computes/cpu-cluster?workspaceId=5dd7bf2e-819b-4686-b6e9-ee7f6000d88d&api-version=2019-11-01\",\\n    \"details\": [\\n      {\\n        \"code\": \"NotFound\",\\n        \"severity\": null,\\n        \"message\": \"{\\\\\"error\\\\\":{\\\\\"code\\\\\":\\\\\"ResourceNotFound\\\\\",\\\\\"message\\\\\":\\\\\"The resource was not found.\\\\\",\\\\\"innererror\\\\\":{\\\\\"clientRequestId\\\\\":\\\\\"cc037684-56d4-45d3-9111-a9ee54beeef5\\\\\",\\\\\"serviceRequestId\\\\\":\\\\\"|00-044bf478f3a6184681ecb1f4f8abd29f-74b5c905e60d6c48-00.ccdceea0_\\\\\"}}}\",\\n        \"messageFormat\": null,\\n        \"messageParameters\": {},\\n        \"referenceCode\": null,\\n        \"detailsUri\": null,\\n        \"target\": null,\\n        \"details\": [],\\n        \"innerError\": null,\\n        \"debugInfo\": null\\n      }\\n    ],\\n    \"innerError\": null,\\n    \"debugInfo\": null\\n  },\\n  \"correlation\": {\\n    \"operation\": \"044bf478f3a6184681ecb1f4f8abd29f\",\\n    \"request\": \"8c220f32e2e45248\"\\n  },\\n  \"environment\": \"southcentralus\",\\n  \"location\": \"southcentralus\",\\n  \"time\": \"2021-01-23T16:47:16.0091921+00:00\",\\n  \"componentName\": \"account-rp\"\\n}'"
    }
}