In [1]:
from azureml.core import Workspace, Experiment

import pandas as pd

In [2]:
ws = Workspace.get(name="quick-starts-ws-119652")
exp = Experiment(workspace=ws, name="automl")

run = exp.start_logging()

In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###

cluster_name = 'hd-cluster'

try:
    compute_target = ComputeTarget(ws, cluster_name)
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', min_nodes=1, max_nodes=4)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

In [4]:
from azureml.data.dataset_factory import TabularDatasetFactory

path_url = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(path=path_url)

In [5]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)
all_data = pd.concat([x,y], axis = 1)

In [12]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=ds,
    label_column_name='y',
    n_cross_validations=5,
    compute_target=compute_target, 
    iterations=100,
max_concurrent_iterations=8,
    max_cores_per_iteration=-1)

In [13]:
# Submit your automl run

### YOUR CODE HERE ###


from azureml.widgets import RunDetails
automl_experiement = exp.submit(automl_config, show_output=True)
RunDetails(automl_experiement).show()
automl_experiement.wait_for_completion(show_output=True)


Running on remote.
Running on remote compute: hd-cluster
Parent Run ID: AutoML_43696938-4190-4a7f-a7a8-055aae3d46bd

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+---------------------------------+---------------------------------+---

        59   MaxAbsScaler LightGBM                          0:00:40       0.9119    0.9154
        58   MaxAbsScaler ExtremeRandomTrees                0:00:38       0.9009    0.9154
        60   StandardScalerWrapper XGBoostClassifier        0:00:39       0.9146    0.9154
        62   StandardScalerWrapper LightGBM                 0:00:37       0.9057    0.9154
        65   SparseNormalizer XGBoostClassifier             0:00:41       0.9131    0.9154
        64   MaxAbsScaler LightGBM                          0:00:39       0.9095    0.9154
        68   StandardScalerWrapper XGBoostClassifier        0:00:45       0.9156    0.9156
        66   MaxAbsScaler LightGBM                          0:00:35       0.9076    0.9156
        63   StandardScalerWrapper LightGBM                 0:00:34       0.9100    0.9156
        67   SparseNormalizer XGBoostClassifier             0:01:10       0.9141    0.9156
        69   StandardScalerWrapper LightGBM                 0:00:42       0.9132    0.9156

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…



****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+---------------------------------+---------------------------------+--------------------------------------+
|Size of the smallest class       |Name/Label of the smallest class |Number of samples in the training data|
|3692                             |yes                              |32950                                 |
+---------------------------------+---------------------------------+--------------------------------------+

********************************************

{'runId': 'AutoML_43696938-4190-4a7f-a7a8-055aae3d46bd',
 'target': 'hd-cluster',
 'status': 'Completed',
 'startTimeUtc': '2020-10-03T19:46:59.043147Z',
 'endTimeUtc': '2020-10-03T20:19:39.390248Z',
 'properties': {'num_iterations': '100',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'hd-cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"36a74578-67a7-4429-8442-a12fa323a58f\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 1, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv\\\\\\", \\\\\\"sas\\\\\\": null, \\\\\\"storageAccountName\\\\\\": null

In [22]:
best_automl_run, best_model = automl_experiement.get_output()

In [23]:
best_automl_run.register_model(model_name = "automl.pkl", model_path = './outputs/')
print(best_model._final_estimator)

PreFittedSoftVotingClassifier(classification_labels=None,
                              estimators=[('91',
                                           Pipeline(memory=None,
                                                    steps=[('standardscalerwrapper',
                                                            <azureml.automl.runtime.shared.model_wrappers.StandardScalerWrapper object at 0x144746320>),
                                                           ('xgboostclassifier',
                                                            XGBoostClassifier(base_score=0.5,
                                                                              booster='gbtree',
                                                                              colsample_bylevel=1,
                                                                              colsample_bynode=1,
                                                                              colsample_bytree=1,
                        

In [None]:
compute_cluster.delete()

<img src='./delete.png'>