In [1]:
from azureml.core import Workspace, Experiment

#Load the existing workspace , create the experiment and start the logging: 
ws = Workspace.get(name="quick-starts-ws-131005")
exp = Experiment(workspace=ws, name="quick-starts-ws-131005")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-131005
Azure region: southcentralus
Subscription id: d4ad7261-832d-46b2-b093-22156001df5b
Resource group: aml-quickstarts-131005


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute

# Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

from azureml.core.compute_target import ComputeTargetException

amlcompute_cluster_name = "cpu-cluster"

try:
    aml_compute = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS3_V2',
                                                           max_nodes=4)
    aml_compute = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

aml_compute.wait_for_completion(show_output=True , min_node_count = 1, timeout_in_minutes = 2)

Creating
Succeeded....................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


In [3]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

path_url = 'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'
ds = TabularDatasetFactory.from_delimited_files(path = path_url)

In [4]:
import pandas as pd
ds.to_pandas_dataframe().to_csv("./training_dataset.csv")

In [5]:
#Create an experiment in the default workspace
experiment = Experiment(ws, "auto_ml_BM_exp")

#Get the default datastore for the workspace.
datastore = ws.get_default_datastore()

In [6]:
#Upload the training dataset to the datastore 
datastore.upload(src_dir = "./", target_path = "data/")


Uploading an estimated of 5 files
Uploading ./Automated ML Experiment (v0).ipynb
Uploaded ./Automated ML Experiment (v0).ipynb, 1 files out of an estimated total of 5
Uploading ./training_dataset.csv
Uploaded ./training_dataset.csv, 2 files out of an estimated total of 5
Uploading ./.ipynb_aml_checkpoints/Automated ML Experiment (v0)-checkpoint2020-11-19-15-10-48.ipynb
Uploaded ./.ipynb_aml_checkpoints/Automated ML Experiment (v0)-checkpoint2020-11-19-15-10-48.ipynb, 3 files out of an estimated total of 5
Uploading ./.ipynb_aml_checkpoints/Automated ML Experiment (v0)-checkpoint2020-11-19-15-9-45.ipynb
Uploaded ./.ipynb_aml_checkpoints/Automated ML Experiment (v0)-checkpoint2020-11-19-15-9-45.ipynb, 4 files out of an estimated total of 5
Uploading ./.ipynb_checkpoints/Automated ML Experiment (v0)-checkpoint.ipynb
Uploaded ./.ipynb_checkpoints/Automated ML Experiment (v0)-checkpoint.ipynb, 5 files out of an estimated total of 5
Uploaded 5 files


$AZUREML_DATAREFERENCE_0b0d730a9b7e4731ba4538496917423e

In [7]:
# Upload the training data as a tabular dataset 
training_data = TabularDatasetFactory.from_delimited_files(path = [(datastore, ("data/training_dataset.csv"))])


In [8]:
from azureml.train.automl import AutoMLConfig
# Set parameters for AutoMLConfig


automl_config = AutoMLConfig(
    experiment_timeout_minutes=15,
    task="classification",
    primary_metric="AUC_weighted",
    compute_target=aml_compute,
    training_data=training_data,
    n_cross_validations= 5,
    max_concurrent_iterations= 3,
    label_column_name="y")


In [9]:

# Submit your automl run
auto_ml_run = experiment.submit(config = automl_config, show_output = True)


Running on remote.
No run_configuration provided, running on cpu-cluster with default configuration
Running on remote compute: cpu-cluster
Parent Run ID: AutoML_aa66ce2f-ee15-43ad-b37a-1170628bbad5

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input