# Notebook Setup

In [1]:
## Import libraries
# Standard libraries
import logging
import os
from matplotlib import pyplot as plt
import pandas as pd

# Azure ML SDK libraries
from azureml.core import Workspace, Experiment, Dataset
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.utilities import get_primary_metrics
from azureml.widgets import RunDetails

# Azure ML Setup

In [2]:
# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

# Create an experiment object
experiment_name = 'customer-churn-automl-experiment'
experiment=Experiment(ws, experiment_name)

Ready to use Azure ML 1.0.85 to work with sbazuremlws


# Create / Retrieve Azure ML Training Cluster Compute Target

In [3]:
# Choose a name for your AmlCompute cluster.
amlcompute_cluster_name = "cpu-cluster-1"

found = False
# Check if this compute target already exists in the workspace.
cts = ws.compute_targets
if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'cpu-cluster-1':
    found = True
    print('Found existing compute target.')
    compute_target = cts[amlcompute_cluster_name]
    
if not found:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_DS12_V2", # for GPU, use "STANDARD_NC6"
                                                                #vm_priority = 'lowpriority', # optional
                                                                max_nodes = 6)

    # Create the cluster.
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)
    
print('Checking cluster status...')
# Can poll for a minimum number of nodes and for a specific timeout.
# If no min_node_count is provided, it will use the scale settings for the cluster.
compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)

# For a more detailed view of current AmlCompute status, use get_status().

Creating a new compute target...
Checking cluster status...
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
# Get metrics for evaluation of classification models
get_primary_metrics('classification')

['precision_score_weighted',
 'norm_macro_recall',
 'average_precision_score_weighted',
 'accuracy',
 'AUC_weighted']

# Configure and Start Auto ML Run

In [5]:
# Create automl config
automl_settings = {
    "primary_metric": 'AUC_weighted',
    "enable_early_stopping": True,
    "max_concurrent_iterations": 5,
    "experiment_timeout_minutes": 60,
    "verbosity": logging.INFO,
    "featurization": "auto"
}

# Load the data
dataset_name = 'customer-churn'

dataset = Dataset.get_by_name(workspace=ws, name=dataset_name)

training_data, validation_data = dataset.random_split(percentage=0.8, seed=42)


label_column_name = "Churn"

automl_config = AutoMLConfig(task = 'classification',
                             debug_log = 'automl_errors.log',
                             compute_target = compute_target,
                             training_data = training_data,
                             validation_data = validation_data,
                             label_column_name = label_column_name,
                             **automl_settings
                            )

In [6]:
# Submit experiment
remote_run = experiment.submit(automl_config, show_output = False)

In [7]:
remote_run.wait_for_completion(show_output=False)

{'runId': 'AutoML_cf018003-5ea5-462b-9f5c-b2ca6c45a854',
 'target': 'cpu-cluster-1',
 'status': 'Completed',
 'startTimeUtc': '2020-04-04T07:04:05.48304Z',
 'endTimeUtc': '2020-04-04T07:46:44.188411Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'AUC_weighted',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'cpu-cluster-1',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"e4eb02f7-6a85-44f5-9a8e-b4ac55bdd58b\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"sbazuremldatalake\\\\\\", \\\\\\"path\\\\\\": \\\\\\"/**\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"sbazuremlrg\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"bf088f59-f015-4332-bd36-54b988be7c90\\\\\\", \\\\\\"workspaceName\\\\\\": \\

In [8]:
# Show run details
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…