In [1]:
from azureml.core import Workspace, Experiment

# ws = Workspace.get(name="udacity-project")
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-130963
Azure region: southcentralus
Subscription id: ac15aef5-0abe-4be6-a0bd-40abc1594138
Resource group: aml-quickstarts-130963


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

cluster_name = "cpu-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found an existing compute target.')
except ComputeTargetException:
    print('No existing compute target found. Creating a new compute target.')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)

    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

print(compute_target.get_status().serialize())

No existing compute target found. Creating a new compute target.
Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2020-12-19T07:49:42.574000+00:00', 'errors': None, 'creationTime': '2020-12-19T07:49:38.230253+00:00', 'modifiedTime': '2020-12-19T07:49:54.208468+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


In [13]:
from azureml.core import ScriptRunConfig, Environment
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, normal, uniform
import os

# Specify parameter sampler
ps = RandomParameterSampling(
     {
        '--C': choice(1,2,4), 
        '--max_iter': choice(10,50,100)
     }
)

# Specify a Policy
policy = BanditPolicy(
           slack_factor = 0.1,
           evaluation_interval = 2
)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create an SKLearn estimator for use with train.py
# The SKLearn feature has been deprecated according to Dec 7 release notes, therefore ScriptRunConfig is used here instead
est = SKLearn(source_directory='./', compute_target=compute_target, entry_script='train.py')
# env = Environment(name="myenv")

# config = ScriptRunConfig(source_directory='./',
#                         script='train.py',
#                         compute_target=compute_target)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator = est,
                             hyperparameter_sampling=ps,
                             policy=policy,
                             primary_metric_name="Accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=40,
                             max_concurrent_runs=4)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [14]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
hyperdrive_run = exp.submit(hyperdrive_config)

from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()

hyperdrive_run.wait_for_completion(show_output=True)



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_adf57c6e-5514-4181-9a33-e557bab9e4e6
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_adf57c6e-5514-4181-9a33-e557bab9e4e6?wsid=/subscriptions/ac15aef5-0abe-4be6-a0bd-40abc1594138/resourcegroups/aml-quickstarts-130963/workspaces/quick-starts-ws-130963

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-12-19T08:05:27.076100][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2020-12-19T08:05:27.474188][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n""<START>[2020-12-19T08:05:26.421600][API][INFO]Experiment created<END>\n"<START>[2020-12-19T08:05:28.1352002Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_adf57c6e-5514-4181-9a33-e557bab9e4e6
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_adf57c6e-5514-4181-9a33-e557bab9e4e6?wsid=/subscriptions/ac15a

{'runId': 'HD_adf57c6e-5514-4181-9a33-e557bab9e4e6',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2020-12-19T08:05:26.178634Z',
 'endTimeUtc': '2020-12-19T08:15:20.818977Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '12da2258-002f-431e-bdb3-75b434061f69',
  'score': '0.9074355083459787',
  'best_child_run_id': 'HD_adf57c6e-5514-4181-9a33-e557bab9e4e6_3',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg130963.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_adf57c6e-5514-4181-9a33-e557bab9e4e6/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=B3NBYI75BHPBN28S54%2FNM2o22Y6KnsziTQO0LGMFZGE%3D&st=2020-12-19T08%3A05%3A35Z&se=2020-12-19T16%3A15%3A35Z&sp=r'}}

In [26]:
import joblib
# Get your best run and save the model from that run.

best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

print('Best run:', best_run)
print('Metrics:', best_run_metrics)

os.makedirs('outputs', exist_ok=True)
joblib.dump(value=best_run, filename='outputs/model.pkl')

model = best_run.register_model(model_name='hyperdrive_model', model_path='./outputs/model.pkl')

Best run: Run(Experiment: udacity-project,
Id: HD_adf57c6e-5514-4181-9a33-e557bab9e4e6_3,
Type: azureml.scriptrun,
Status: Completed)
Metrics: {'Regularization Strength:': 2.0, 'Max iterations:': 50, 'Accuracy': 0.9074355083459787}


ModelPathNotFoundException: ModelPathNotFoundException:
	Message: Could not locate the provided model_path outputs/model.pkl in the set of files uploaded to the run: ['azureml-logs/55_azureml-execution-tvmps_d6d655e64ee3fac0dd8a144dbcb230d52048da48bfedef175c18936d5f60a927_d.txt', 'azureml-logs/65_job_prep-tvmps_d6d655e64ee3fac0dd8a144dbcb230d52048da48bfedef175c18936d5f60a927_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_d6d655e64ee3fac0dd8a144dbcb230d52048da48bfedef175c18936d5f60a927_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/102_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log']
                See https://aka.ms/run-logging for more details.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Could not locate the provided model_path outputs/model.pkl in the set of files uploaded to the run: ['azureml-logs/55_azureml-execution-tvmps_d6d655e64ee3fac0dd8a144dbcb230d52048da48bfedef175c18936d5f60a927_d.txt', 'azureml-logs/65_job_prep-tvmps_d6d655e64ee3fac0dd8a144dbcb230d52048da48bfedef175c18936d5f60a927_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_d6d655e64ee3fac0dd8a144dbcb230d52048da48bfedef175c18936d5f60a927_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/102_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log']\n                See https://aka.ms/run-logging for more details."
    }
}

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

data_url = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(path=data_url)


In [None]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

# Split data into train and test sets.
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=3)

In [None]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes = 30,
    task = 'classification',
    primary_metric = 'accuracy',
    training_data = ds,
    label_column_name = 'y',
    n_cross_validations = 5)

In [2]:
# Submit your automl run

experiment = Experiment(ws, "bank_telemarketing_automl")
print("Experiment created")
# submit(config, tags=None, **kwargs)
run = experiment.submit(config=automl_config, show_output=True)

In [None]:
# Retrieve and save your best automl model
import onnxruntime, onnxmltools

joblib.dump(fitted_model, "fitted_automl_model.joblib")

best_run = hyperdrive_run.get_best_run_by_primary_metric()
print(best_run.get_details()['runDefinition']['arguments'])

print(best_run.get_file_names())
print('Best Rund ID: ', best_run.id)
print('\n Accuracy: ', best_run_metric['accuracy'])

model = best_run.register_model(model_name='automl-bank-telemarketing', model_path='outputs/model.joblib')

# session = onnxruntime.InferenceSession("path to model")

In [None]:
# Delete the cluster instance
azureml.core.compute.AmlCompute.delete()