In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.get(name="quick-starts-ws-131987")
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')



Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code ESD46YX23 to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
Workspace name: quick-starts-ws-131987
Azure region: southcentralus
Subscription id: a24a24d5-8d87-4c8a-99b6-91ed2d2df51f
Resource group: aml-quickstarts-131987


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
cluster_name = "drao-aml-cluster"

try:
    # Check for existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name, )
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4, min_nodes=1)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Creating
Succeeded........................
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [3]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.hyperdrive import choice
import os

# Create a Python environment for the experiment
sklearn_env = Environment("sklearn-env")

# Ensure the required packages are installed (we need scikit-learn, Azure ML defaults, and Azure ML dataprep)
packages = CondaDependencies.create(conda_packages=['scikit-learn','pip'],
                                    pip_packages=['azureml-defaults','azureml-dataprep[pandas]'])
sklearn_env.python.conda_dependencies = packages

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        # Hyperdrive will try 6 combinations, adding these as script arguments
        '--C': choice(100, 10, 5, 1.0, 2),
        '--max_iter' : choice(500, 1000, 5000)
    }
)

# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")
    
experiment_folder='./training'
# Create a SKLearn estimator for use with train.py
est = ScriptRunConfig(source_directory=experiment_folder,
                                script='train.py',                                
                                environment=sklearn_env,
                                compute_target = training_cluster)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=est, 
                          hyperparameter_sampling=ps, 
                          policy=policy, # No early stopping policy
                          primary_metric_name='Accuracy', # Find the highest AUC metric
                          primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                          max_total_runs=15, # Restict the experiment to 6 iterations
                          max_concurrent_runs=2)

In [4]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

# Run the experiment
hd_experiment = Experiment(workspace=ws, name='bankmarketing-hyperdrive')
hd_run = hd_experiment.submit(config=hyperdrive_config)

#AutoML

In [5]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

ds_tab = TabularDatasetFactory.from_delimited_files(path="https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv")


In [6]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    compute_target = training_cluster,
    task='classification',
    primary_metric='accuracy',
    training_data = ds_tab,
    label_column_name='y',
    n_cross_validations=5)


In [7]:
# Submit your automl run

automl_experiment = Experiment(ws, 'bankmarketing-automl-sdk')
automl_run = automl_experiment.submit(automl_config)

Running on remote.


#With Cleaned Data

In [8]:
from training.train import clean_data
from azureml.core import Workspace, Dataset

# Use the clean_data function to clean your data.
x, y = clean_data(ds_tab)
dataframe = x.copy()
dataframe['y']=y
local_path = 'data/prepared.csv'
dataframe.to_csv(local_path)

# get the datastore to upload prepared data
datastore = ws.get_default_datastore()

# upload the local file from src_dir to the target_path in datastore
datastore.upload(src_dir='data', target_path='data')

# create a dataset referencing the cloud location
dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, ('data/prepared.csv'))])

Uploading an estimated of 1 files
Uploading data/prepared.csv
Uploaded data/prepared.csv, 1 files out of an estimated total of 1
Uploaded 1 files


In [9]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    compute_target = training_cluster,
    task='classification',
    primary_metric='accuracy',
    training_data = dataset,
    label_column_name='y',
    n_cross_validations=5)

In [10]:
automl2_experiment = Experiment(ws, 'bankmarketing-automl2-sdk')
automl2_run = automl2_experiment.submit(automl_config)

Running on remote.


#Job Analysis

In [11]:
# Show the status in the notebook as the experiment runs
RunDetails(hd_run).show()
RunDetails(automl_run).show()
RunDetails(automl2_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [12]:
hd_run.wait_for_completion(show_output=True)
automl_run.wait_for_completion(show_output=True)
automl2_run.wait_for_completion(show_output=True)

RunId: HD_e92906cf-4705-4360-9e1e-1950ec8f2865
Web View: https://ml.azure.com/experiments/bankmarketing-hyperdrive/runs/HD_e92906cf-4705-4360-9e1e-1950ec8f2865?wsid=/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourcegroups/aml-quickstarts-131987/workspaces/quick-starts-ws-131987

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-12-27T06:18:57.343781][GENERATOR][INFO]Trying to sample '2' jobs from the hyperparameter space<END>\n""<START>[2020-12-27T06:18:57.490626][GENERATOR][INFO]Successfully sampled '2' jobs, they will soon be submitted to the execution target.<END>\n""<START>[2020-12-27T06:18:55.983874][API][INFO]Experiment created<END>\n"<START>[2020-12-27T06:18:58.7810534Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_e92906cf-4705-4360-9e1e-1950ec8f2865
Web View: https://ml.azure.com/experiments/bankmarketing-hyperdrive/runs/HD_e92906cf-4705-4360-9e1e-1950ec8f2865?wsid=/s

{'runId': 'AutoML_474d2532-7f9c-4cc9-b410-18e3d286b7ec',
 'target': 'drao-aml-cluster',
 'status': 'Completed',
 'startTimeUtc': '2020-12-27T06:19:47.00326Z',
 'endTimeUtc': '2020-12-27T07:06:08.953871Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'drao-aml-cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"b81c4b8a-8c42-49c6-8041-c8259ab6c3a0\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"data/prepared.csv\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"aml-quickstarts-131987\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"a24a24d5-8d87-4c8a-99b6-91ed2d2df51f\\\\\\", \\\\

In [13]:
import joblib
from azureml.core import Model
# Get your best run and save the model from that run.

for child_run in hd_run.get_children_sorted_by_primary_metric():
    print(child_run)

# Get the best run, and its metrics and arguments
best_run = hd_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
script_arguments = best_run.get_details() ['runDefinition']['arguments']
print('Best Run Id: ', best_run.id)
print(' -Accuracy:', best_run_metrics['Accuracy'])
print(' -Arguments:',script_arguments)

best_run.register_model(model_path='outputs/bankmarketing_model.pkl', model_name='bankmarketing_model',
                        tags={'Training context':'Hyperdrive'},
                        properties={'AUC': best_run_metrics['AUC'], 'Accuracy': best_run_metrics['Accuracy']})

{'run_id': 'HD_e92906cf-4705-4360-9e1e-1950ec8f2865_1', 'hyperparameters': '{"--C": 5, "--max_iter": 500}', 'best_primary_metric': 0.9123925139099646, 'status': 'Completed'}
{'run_id': 'HD_e92906cf-4705-4360-9e1e-1950ec8f2865_2', 'hyperparameters': '{"--C": 2, "--max_iter": 500}', 'best_primary_metric': 0.9122913505311078, 'status': 'Completed'}
{'run_id': 'HD_e92906cf-4705-4360-9e1e-1950ec8f2865_14', 'hyperparameters': '{"--C": 2, "--max_iter": 5000}', 'best_primary_metric': 0.9117855336368235, 'status': 'Completed'}
{'run_id': 'HD_e92906cf-4705-4360-9e1e-1950ec8f2865_12', 'hyperparameters': '{"--C": 5, "--max_iter": 1000}', 'best_primary_metric': 0.9117855336368235, 'status': 'Completed'}
{'run_id': 'HD_e92906cf-4705-4360-9e1e-1950ec8f2865_11', 'hyperparameters': '{"--C": 5, "--max_iter": 5000}', 'best_primary_metric': 0.9117855336368235, 'status': 'Completed'}
{'run_id': 'HD_e92906cf-4705-4360-9e1e-1950ec8f2865_9', 'hyperparameters': '{"--C": 2, "--max_iter": 1000}', 'best_primary_m

Model(workspace=Workspace.create(name='quick-starts-ws-131987', subscription_id='a24a24d5-8d87-4c8a-99b6-91ed2d2df51f', resource_group='aml-quickstarts-131987'), name=bankmarketing_model, id=bankmarketing_model:1, version=1, tags={'Training context': 'Hyperdrive'}, properties={'AUC': '0.9208803129775196', 'Accuracy': '0.9123925139099646'})

In [14]:
from azureml.core import Model

best_run, fitted_model = automl_run.get_output()
print(best_run)
print(fitted_model)
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)
# Register model
best_run.register_model(model_path='outputs/model.pkl', model_name='bankmarketing_model_automl',
                        tags={'Training context':'Auto ML'},
                        properties={'AUC': best_run_metrics['AUC_weighted'], 'Accuracy': best_run_metrics['accuracy']})

Run(Experiment: bankmarketing-automl-sdk,
Id: AutoML_ee1be0b9-cce7-48f6-8fda-50efd5258fbd_21,
Type: azureml.scriptrun,
Status: Completed)
Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                                    min_weight_fraction_leaf=0.0,
                                                                                                    n_estimators=10,
       

Model(workspace=Workspace.create(name='quick-starts-ws-131987', subscription_id='a24a24d5-8d87-4c8a-99b6-91ed2d2df51f', resource_group='aml-quickstarts-131987'), name=bankmarketing_model_automl, id=bankmarketing_model_automl:1, version=1, tags={'Training context': 'Auto ML'}, properties={'AUC': '0.9469520811379704', 'Accuracy': '0.9171168437025796'})

In [15]:
from azureml.core import Model

best_run, fitted_model = automl2_run.get_output()
print(best_run)
print(fitted_model)
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)
# Register model
best_run.register_model(model_path='outputs/model.pkl', model_name='bankmarketing_model_automl2',
                        tags={'Training context':'Auto ML'},
                        properties={'AUC': best_run_metrics['AUC_weighted'], 'Accuracy': best_run_metrics['accuracy']})

Run(Experiment: bankmarketing-automl2-sdk,
Id: AutoML_474d2532-7f9c-4cc9-b410-18e3d286b7ec_0,
Type: azureml.scriptrun,
Status: Completed)
Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('MaxAbsScaler', MaxAbsScaler(copy...
                 LightGBMClassifier(boosting_type='gbdt', class_weight=None,
                                    colsample_bytree=1.0,
                                    importance_type='split', learning_rate=0.1,
                                    ma

Model(workspace=Workspace.create(name='quick-starts-ws-131987', subscription_id='a24a24d5-8d87-4c8a-99b6-91ed2d2df51f', resource_group='aml-quickstarts-131987'), name=bankmarketing_model_automl2, id=bankmarketing_model_automl2:1, version=1, tags={'Training context': 'Auto ML'}, properties={'AUC': '0.9490010319293563', 'Accuracy': '0.9159635811836114'})

In [16]:
#delete computer cluster
training_cluster.delete()

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

