In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: udacity-demo
Azure region: centralus
Subscription id: c5da1480-248e-4cc5-ba27-697bc9911ecf
Resource group: udacity-demo


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute

cluster_name = "demo-cluster"

compute_config = AmlCompute.provisioning_configuration(
    vm_size = "STANDARD_D2_V2",
    min_nodes=0,
    max_nodes=4,
)

try:
    clusterhandler = ComputeTarget(workspace=ws, name=cluster_name)
    print("Cluster is existing")
except:
    print("Could not find cluster")
    clusterhandler = ComputeTarget.create(workspace=ws, name=cluster_name, provisioning_configuration=compute_config)
clusterhandler.wait_for_completion()
# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###


Cluster is existing


In [3]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
dataset_link = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
ds = TabularDatasetFactory.from_delimited_files(path=dataset_link)
print(ds)

TabularDataset
{
  "source": [
    "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
  ],
  "definition": [
    "GetFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ]
}


In [4]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

end of clean data


In [5]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    primary_metric="accuracy",
    task='classification',
    experiment_timeout_minutes=20,
    training_data=ds,
    label_column_name='y',
    n_cross_validations=3,
    compute_target=clusterhandler
)
print("done")

done


In [6]:
# Submit your automl run

### YOUR CODE HERE ###
from azureml.widgets import RunDetails
remote_run=exp.submit(automl_config)
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)
print("started running auto ml")

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_afbba018-e7b9-43d7-9ac2-b5478f6836fd,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_afbba018-e7b9-43d7-9ac2-b5478f6836fd,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+------------------------------+--------------------------------+-------------------------------------

In [7]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###
bestrun, best_model = remote_run.get_output()
print(bestrun)


Package:azureml-automl-runtime, training version:1.52.0.post1, current version:1.51.0.post1
Package:azureml-core, training version:1.52.0, current version:1.51.0
Package:azureml-dataprep, training version:4.11.4, current version:4.10.8
Package:azureml-dataprep-rslex, training version:2.18.4, current version:2.17.12
Package:azureml-dataset-runtime, training version:1.52.0, current version:1.51.0
Package:azureml-defaults, training version:1.52.0, current version:1.51.0
Package:azureml-interpret, training version:1.52.0, current version:1.51.0
Package:azureml-mlflow, training version:1.52.0, current version:1.51.0
Package:azureml-pipeline-core, training version:1.52.0, current version:1.51.0
Package:azureml-responsibleai, training version:1.52.0, current version:1.51.0
Package:azureml-telemetry, training version:1.52.0, current version:1.51.0
Package:azureml-train-automl-client, training version:1.52.0, current version:1.51.0.post1
Package:azureml-train-automl-runtime, training version:1.

Run(Experiment: udacity-project,
Id: AutoML_afbba018-e7b9-43d7-9ac2-b5478f6836fd_29,
Type: azureml.scriptrun,
Status: Completed)


In [9]:
import joblib

#os.mkdir("model_output")
os.chdir("model_output")
joblib.dump(best_model,"AUTOML.pkl")

['AUTOML.pkl']

In [None]:
clusterhandler.delete()

In [None]:
# Retrieve and save your best automl model.
