In [2]:
from azureml.core import Workspace, Experiment, Environment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")
my_env = Environment.get(workspace=ws, name="AzureML-Scikit-learn-0.20.3")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')
run = exp.start_logging()

Workspace name: quick-starts-ws-137806
Azure region: southcentralus
Subscription id: 3e42d11f-d64d-4173-af9b-12ecaa1030b3
Resource group: aml-quickstarts-137806


In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###

# Choose a name for your CPU cluster
cpu_cluster_name = "cpu-cluster-4"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.hyperdrive.parameter_expressions import choice
from azureml.core.script_run_config import ScriptRunConfig

import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        # max_iter: maximum number of iterations
        '--max_iter': choice(10, 50, 100, 200),
        # C: inverse of regularisation strength
        '--C': uniform(.01, .02)
    }
)

# Specify a Policy
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

### YOUR CODE HERE ###

if "training" not in os.listdir():
    os.mkdir("./training")

# Create an estimator for use with train.py
est = ScriptRunConfig(source_directory='./',script='train.py',compute_target=compute_target,environment=my_env)
### YOUR CODE HERE ###
### Set compute target skip this if you are running on your local computer script_run_config.run_config.target = compute_target ###
hyperdrive_config = HyperDriveConfig(run_config=est, 
                             hyperparameter_sampling=ps,
                             policy=early_termination_policy,
                             primary_metric_name="Accuracy", 
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                             max_total_runs=4,
                             max_concurrent_runs=4)

In [5]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
from azureml.widgets import RunDetails
# start the HyperDrive run

hyperdrive_run = exp.submit(hyperdrive_config)

RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)


_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_9f366745-af9b-410c-8cfb-a4d039a1f53a
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_9f366745-af9b-410c-8cfb-a4d039a1f53a?wsid=/subscriptions/3e42d11f-d64d-4173-af9b-12ecaa1030b3/resourcegroups/aml-quickstarts-137806/workspaces/quick-starts-ws-137806

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-02-07T10:42:38.936003][API][INFO]Experiment created<END>\n""<START>[2021-02-07T10:42:39.667035][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n"<START>[2021-02-07T10:42:39.8556360Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>"<START>[2021-02-07T10:42:39.967182][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_9f366745-af9b-410c-8cfb-a4d039a1f53a
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_9f366745-af9b-410c-8cfb-a4d039a1f53a?wsid=/subscriptions/3e42d

{'runId': 'HD_9f366745-af9b-410c-8cfb-a4d039a1f53a',
 'target': 'cpu-cluster-4',
 'status': 'Completed',
 'startTimeUtc': '2021-02-07T10:42:38.048305Z',
 'endTimeUtc': '2021-02-07T10:50:23.110443Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'e9827526-fed0-46dd-8e85-634249097693',
  'score': '0.9121396054628225',
  'best_child_run_id': 'HD_9f366745-af9b-410c-8cfb-a4d039a1f53a_2',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg137806.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_9f366745-af9b-410c-8cfb-a4d039a1f53a/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=yuMtD1cKvup9f2lG6TIdGABk2Z0qq4xmjGRORSvCWj0%3D&st=2021-02-07T10%3A40%3A40Z&se=2021-02-07T18%3A50%3A40Z&sp=r'},
 'submittedBy': 'ODL_User 137806'}

In [6]:
import joblib
# Get your best run and save the model from that run.

### YOUR CODE HERE .get_best_run_by_primary_metric() method ###

best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run)
print(best_run.get_details()['runDefinition']['arguments'])
print('\n Accuracy: ', best_run_metrics['Accuracy'])
print('\n Parameter C - Inverse Regularisation Parameter: ', parameter_values[1])
print('\n Maximum Iterations: ', parameter_values[3])
model = best_run.register_model(model_name='my_hyperdrive_best_model', model_path = 'outputs/hyperdrive_best_model.joblib')

Best Run Id:  Run(Experiment: udacity-project,
Id: HD_9f366745-af9b-410c-8cfb-a4d039a1f53a_2,
Type: azureml.scriptrun,
Status: Completed)
['--C', '0.01724232872880165', '--max_iter', '50']

 Accuracy:  0.9121396054628225

 Parameter C - Inverse Regularisation Parameter:  0.01724232872880165

 Maximum Iterations:  50


In [7]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
file_path = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ### 
ds = TabularDatasetFactory.from_delimited_files(path=file_path)

In [8]:
from train import clean_data
import pandas as pd

# Use the clean_data function to clean your data.
x, y = clean_data(ds)
df = pd.concat([x,y], axis=1)

       age          job  marital          education  default housing loan  \
0       57   technician  married        high.school       no      no  yes   
1       55      unknown  married            unknown  unknown     yes   no   
2       33  blue-collar  married           basic.9y       no      no   no   
3       36       admin.  married        high.school       no      no   no   
4       27    housemaid  married        high.school       no     yes   no   
...    ...          ...      ...                ...      ...     ...  ...   
32945   56    housemaid  married           basic.4y       no      no  yes   
32946   37   management  married  university.degree       no      no  yes   
32947   26       admin.   single  university.degree       no      no   no   
32948   31  blue-collar   single           basic.9y       no      no   no   
32949   39    housemaid  married           basic.4y       no      no   no   

         contact month day_of_week  ...  campaign  pdays  previous  \
0    

In [9]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=df,
    label_column_name='y',
    n_cross_validations=5)

In [10]:
# Submit your automl run

### YOUR CODE HERE ###
# create an experiment
exp = Experiment(workspace=ws, name="automl")
# submit the experience created above with autoML configuration
automl_run = exp.submit(automl_config, show_output = True)

No run_configuration provided, running on local with default configuration
Running on local machine
Parent Run ID: AutoML_0be3dbd4-14af-4518-9ddf-9ce8c2c14ded

Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed fit featurizers and featurizing the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/Auto

In [11]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###

best_run, best_model = automl_run.get_output()
#best_run_metrics = best_run.get_metrics()
#parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run)
print('Best Model: ', best_model)
model = best_run.register_model(model_name='my_autoML_best_model.pkl', model_path = './outputs/')


Best Run Id:  Run(Experiment: automl,
Id: AutoML_0be3dbd4-14af-4518-9ddf-9ce8c2c14ded_36,
Type: None,
Status: Completed)
Best Model:  Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                               reg_alpha=1.0416666666666667,
                                                                                               reg_lambda=1.5625,
                   

In [12]:
# Free up the compute target
compute_target.delete()
print("compute target is deleted")

compute target is deleted
Current provisioning state of AmlCompute is "Deleting"

