In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()

experiment = Experiment(workspace=ws, name="train_bankmarketing")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = experiment.start_logging()

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code AAGF7ZCGM to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
Workspace name: quick-starts-ws-131804
Azure region: southcentralus
Subscription id: 976ee174-3882-4721-b90a-b5fef6b72f24
Resource group: aml-quickstarts-131804


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "cpu-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Creating a new compute target...
Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2020-12-26T02:17:49.237000+00:00', 'errors': None, 'creationTime': '2020-12-26T02:17:43.468746+00:00', 'modifiedTime': '2020-12-26T02:17:58.843652+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


**Create a project directory**
Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. 
This includes the training script and any additional files your training script depends on.

In [3]:
import os

project_folder = './bankmarketing'
os.makedirs(project_folder, exist_ok=True)

** Copy train.py script to project folder **

In [4]:
import shutil

shutil.copy('train.py', project_folder)

'./bankmarketing/train.py'

**Create an environment**

Define a conda environment YAML file with your training script dependencies and create an Azure ML environment.

In [5]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.2
- scikit-learn
- pip:
  - azureml-defaults

Writing conda_dependencies.yml


In [6]:
from azureml.core import Environment

sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')


** Configure the training job **

Create a ScriptRunConfig object to specify the configuration details of your training job, including your training script, environment to use, and the compute target to run on.

In [7]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=project_folder,
                      script='train.py',
                      arguments=['--C', 1, '--max_iter', 100, '--solver', 'lbfgs'],
                      compute_target=compute_target,
                      environment=sklearn_env)

**Submit job**

Run your experiment by submitting your ScriptRunConfig object. Note that this call is asynchronous.

In [8]:
run = experiment.submit(src)

**Monitor your run**

You can monitor the progress of the run with a Jupyter widget. 
Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes.

In [9]:
from azureml.widgets import RunDetails

RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [10]:
run.wait_for_completion(show_output=True)

RunId: train_bankmarketing_1608949082_963fbf22
Web View: https://ml.azure.com/experiments/train_bankmarketing/runs/train_bankmarketing_1608949082_963fbf22?wsid=/subscriptions/976ee174-3882-4721-b90a-b5fef6b72f24/resourcegroups/aml-quickstarts-131804/workspaces/quick-starts-ws-131804

Streaming azureml-logs/20_image_build_log.txt

2020/12/26 02:18:12 Downloading source code...
2020/12/26 02:18:14 Finished downloading source code
2020/12/26 02:18:14 Creating Docker network: acb_default_network, driver: 'bridge'
2020/12/26 02:18:15 Successfully set up Docker network: acb_default_network
2020/12/26 02:18:15 Setting up Docker configuration...
2020/12/26 02:18:15 Successfully set up Docker configuration
2020/12/26 02:18:15 Logging in to registry: 50f2290217d049cb95b75cb25ad226e3.azurecr.io
2020/12/26 02:18:16 Successfully logged into 50f2290217d049cb95b75cb25ad226e3.azurecr.io
2020/12/26 02:18:16 Executing step ID: acb_step_0. Timeout(sec): 5400, Working directory: '', Network: 'acb_default_

{'runId': 'train_bankmarketing_1608949082_963fbf22',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2020-12-26T02:26:32.530825Z',
 'endTimeUtc': '2020-12-26T02:29:07.036187Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': 'b5754abd-4b62-4ca2-8670-e787a2e5288a',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'train.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--C', '1', '--max_iter', '100', '--solver', 'lbfgs'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'cpu-cluster',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'jobName': None,
  'maxRunDurationSeconds': 2592000,
  'nodeCount': 1,
  'priority': None,
  'credentialPassthrough': False,
  'environment': {'name': 'sklearn-env',
   'version': 'Autosave

In [11]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.core import ScriptRunConfig
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.hyperdrive.parameter_expressions import choice
import os

# Specify parameter sampler
ps = RandomParameterSampling( {
    "--C": choice(0.01, 0.1, 1, 10, 100),
    "--max_iter": choice(100, 200, 500),
    "--solver": choice('newton-cg', 'lbfgs', 'liblinear'),    
    }
)


# Specify a Policy
# The BanditPolicy basically states to check the job every 2 iterations. 
# If the primary metric (defined later) falls outside of the top 10% range, Azure ML 
# terminate the job. This saves us from continuing to explore hyperparameters that don't 
# show promise of helping reach our target metric.
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

# Create a SKLearn estimator for use with train.py
# est = ### YOUR CODE HERE ###
# I decided not to use the estimator because the documentation (https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.sklearn.sklearn) says that it is deprecated

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name='Accuracy',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=20,
    max_concurrent_runs=4)

In [12]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

# start the HyperDrive run
hyperdrive_run = experiment.submit(hyperdrive_config)

**Monitor HyperDrive runs**

Monitor the progress of the runs with the following Jupyter widget.

In [13]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [14]:
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_66632a92-5b45-4d45-9638-226b2e3c9eaa
Web View: https://ml.azure.com/experiments/train_bankmarketing/runs/HD_66632a92-5b45-4d45-9638-226b2e3c9eaa?wsid=/subscriptions/976ee174-3882-4721-b90a-b5fef6b72f24/resourcegroups/aml-quickstarts-131804/workspaces/quick-starts-ws-131804

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-12-26T02:30:00.822847][API][INFO]Experiment created<END>\n""<START>[2020-12-26T02:30:01.487607][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2020-12-26T02:30:01.813980][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2020-12-26T02:30:02.4174465Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_66632a92-5b45-4d45-9638-226b2e3c9eaa
Web View: https://ml.azure.com/experiments/train_bankmarketing/runs/HD_66632a92-5b45-4d45-9638-226b2e3c9eaa?wsid=/subscriptio

{'runId': 'HD_66632a92-5b45-4d45-9638-226b2e3c9eaa',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2020-12-26T02:30:00.523557Z',
 'endTimeUtc': '2020-12-26T02:43:38.283032Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'b5754abd-4b62-4ca2-8670-e787a2e5288a',
  'score': '0.9117160198638955',
  'best_child_run_id': 'HD_66632a92-5b45-4d45-9638-226b2e3c9eaa_7',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg131804.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_66632a92-5b45-4d45-9638-226b2e3c9eaa/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=sNTu9erPVE6Y0NXhc4hl9k1eY6ww8h8hAP5VF8iv6yE%3D&st=2020-12-26T02%3A33%3A45Z&se=2020-12-26T10%3A43%3A45Z&sp=r'}}

In [15]:
assert(hyperdrive_run.get_status() == "Completed")

In [16]:
import joblib
from sklearn.linear_model import LogisticRegression
from azureml.data.dataset_factory import TabularDatasetFactory
from train import clean_data
from sklearn.model_selection import train_test_split

# Create TabularDataset using TabularDatasetFactory
dataset_path = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(path = dataset_path)

x, y = clean_data(ds)

# Split data into train and test sets.
x_train, x_test, y_train, y_test = train_test_split(x, y , test_size=0.33, random_state=42)

best_run = hyperdrive_run.get_best_run_by_primary_metric()
arguments = best_run.get_details()['runDefinition']['arguments']
print(arguments)
print(best_run.get_file_names())

model = LogisticRegression(
    C=int(arguments[1]), 
    max_iter=int(arguments[3]), 
    solver=arguments[5])
model.fit(x_train, y_train)
filename = 'best_model.sav'

joblib.dump(model, filename)

['--C', '1', '--max_iter', '100', '--solver', 'lbfgs', '--C', '0.1', '--max_iter', '500', '--solver', 'newton-cg']
['azureml-logs/55_azureml-execution-tvmps_b967fcabd96baccc95b330f48f07a363aa12efc458328ae3c3767bf381b84c1a_d.txt', 'azureml-logs/65_job_prep-tvmps_b967fcabd96baccc95b330f48f07a363aa12efc458328ae3c3767bf381b84c1a_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_b967fcabd96baccc95b330f48f07a363aa12efc458328ae3c3767bf381b84c1a_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/100_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/engine_spans_l_92af3978-3a34-4833-b821-94f409bfca7a.jsonl', 'logs/azureml/dataprep/python_span_l_92af3978-3a34-4833-b821-94f409bfca7a.jsonl', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log']


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


['best_model.sav']

In [17]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

dataset_path = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(path = dataset_path)

In [18]:
from train import clean_data
from sklearn.model_selection import train_test_split
import pandas as pd
from azureml.core.dataset import Dataset

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

# Split data into train and test sets.
x_train, x_test, y_train, y_test = train_test_split(x, y , test_size=0.33, random_state=42)
training_data=pd.concat([x_train,y_train], axis=1)
testing_data=pd.concat([x_test,y_test], axis=1)


if not os.path.isdir('data'):
    os.mkdir('data')
    
# Save the train data to a csv to be uploaded to the datastore
pd.DataFrame(training_data).to_csv("data/train_data.csv", index=False)
pd.DataFrame(testing_data).to_csv("data/test_data.csv", index=False)

ds = ws.get_default_datastore()
ds.upload(src_dir='./data', target_path='bankmarketing', overwrite=True, show_progress=True)

# Upload the training data as a tabular dataset for access during training on remote compute
train_data = Dataset.Tabular.from_delimited_files(path=ds.path('bankmarketing/train_data.csv'))

test_data = Dataset.Tabular.from_delimited_files(path=ds.path('bankmarketing/test_data.csv'))
label = "y"



Uploading an estimated of 2 files
Uploading ./data/test_data.csv
Uploaded ./data/test_data.csv, 1 files out of an estimated total of 2
Uploading ./data/train_data.csv
Uploaded ./data/train_data.csv, 2 files out of an estimated total of 2
Uploaded 2 files


In [19]:
from azureml.train.automl import AutoMLConfig


# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    compute_target=compute_target,
    enable_onnx_compatible_models=True,
    training_data=train_data,
    validation_data=test_data,
    label_column_name=label)

In [20]:
# Retrieve and save your best automl model.
remote_run = experiment.submit(automl_config, show_output = False)

# Wait for the remote run to complete
remote_run.wait_for_completion()



Running on remote.


{'runId': 'AutoML_7f1c556c-02f5-49b0-a9d1-cb0e58c97283',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2020-12-26T02:44:39.917192Z',
 'endTimeUtc': '2020-12-26T03:20:56.209775Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'cpu-cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"175e6587-90dc-4444-a033-068deb67da5a\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"bankmarketing/train_data.csv\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"aml-quickstarts-131804\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"976ee174-3882-4721-b90a-b5fef6b72f24\\\\\\", \

In [21]:
best_run_customized, fitted_model_customized = remote_run.get_output()

In [22]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [23]:
from azureml.automl.runtime.onnx_convert import OnnxConverter
best_run, onnx_mdl = remote_run.get_output(return_onnx_model=True)
onnx_fl_path = "./best_model.onnx"
OnnxConverter.save_onnx_model(onnx_mdl, onnx_fl_path)

**ComputeTarget Cleanup**

In [25]:
compute_target.delete()

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

