In [65]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: azuremlspecialization
Azure region: brazilsouth
Subscription id: b7f9dec1-d14e-4b56-82d2-3567170c57d9
Resource group: defaultresourcegroup-eastus


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.exceptions import ComputeTargetException

cluster_name = "project1-compute-cluster"

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

In [31]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {"--C": choice(0.01, 0.1, 1, 10, 100),
     "--max_iter": choice(100, 150, 200, 300)}
)

# Specify a Policy
policy = BanditPolicy(evaluation_interval=1, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')



# Creating an estimator to execute the train.py script
est = SKLearn( 
    source_directory='./',
    compute_target=cpu_cluster,
    entry_script='train.py'
)

# # Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est,
                                     hyperparameter_sampling=ps,
                                     policy=policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=20,
                                     max_concurrent_runs=4)



In [32]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
hyperdrive_run = exp.submit(hyperdrive_config)
RunDetails(hyperdrive_run).show()



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [34]:
# wait for the run to complete...
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_a6310fee-ecf3-457d-8061-45c23b09c77b
Web View: https://ml.azure.com/runs/HD_a6310fee-ecf3-457d-8061-45c23b09c77b?wsid=/subscriptions/b7f9dec1-d14e-4b56-82d2-3567170c57d9/resourcegroups/defaultresourcegroup-eastus/workspaces/azuremlspecialization&tid=f419c9fe-f7b0-4d87-bee8-e8dfb2190cab

Execution Summary
RunId: HD_a6310fee-ecf3-457d-8061-45c23b09c77b
Web View: https://ml.azure.com/runs/HD_a6310fee-ecf3-457d-8061-45c23b09c77b?wsid=/subscriptions/b7f9dec1-d14e-4b56-82d2-3567170c57d9/resourcegroups/defaultresourcegroup-eastus/workspaces/azuremlspecialization&tid=f419c9fe-f7b0-4d87-bee8-e8dfb2190cab



{'runId': 'HD_a6310fee-ecf3-457d-8061-45c23b09c77b',
 'target': 'project1-compute-cluster',
 'status': 'Completed',
 'startTimeUtc': '2023-07-24T17:41:37.724137Z',
 'endTimeUtc': '2023-07-24T17:53:13.733385Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"Accuracy","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'e5bb9707-441f-4d3f-874b-938a00b4c998',
  'user_agent': 'python/3.8.5 (Linux-5.15.0-1035-azure-x86_64-with-glibc2.10) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.49.0',
  'space_size': '20',
  'score': '0.9050075872534142',
  'best_child_run_id': 'HD_a6310fee-ecf3-457d-8061-45c23b09c77b_5',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_a6310fee-ecf3-457d-8061-45c23b09c77b_5'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  'telemetryValues':

In [36]:
import joblib
# Get your best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run.get_details()
hyperdrive_run.get_children_sorted_by_primary_metric(top=1)

[{'run_id': 'HD_a6310fee-ecf3-457d-8061-45c23b09c77b_16',
  'hyperparameters': '{"--C": 1, "--max_iter": 100}',
  'best_primary_metric': 0.9050075872534142,
  'status': 'Completed'}]

In [37]:
best_run.get_file_names()

['outputs/model.joblib',
 'system_logs/cs_capability/cs-capability.log',
 'system_logs/hosttools_capability/hosttools-capability.log',
 'system_logs/lifecycler/execution-wrapper.log',
 'system_logs/lifecycler/lifecycler.log',
 'system_logs/metrics_capability/metrics-capability.log',
 'system_logs/snapshot_capability/snapshot-capability.log',
 'user_logs/std_log.txt']



In [41]:
# Downloading model from the run into the workspace
best_run.download_file('outputs/model.joblib')

# registering best model in the workspace
model = best_run.register_model(model_name = 'bank_subscription_hyperdrive', model_path = 'outputs/model.joblib')
print(model.name, model.id, model.version, sep = '\t')

bank_subscription_hyperdrive	bank_subscription_hyperdrive:1	1


In [43]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
dataset = TabularDatasetFactory.from_delimited_files(path="https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv")

In [49]:
from train import clean_data
import pandas as pd

# Use the clean_data function to clean your data.
x, y = clean_data(dataset)
# concatenating x and y into a single dataset again
cleaned_dataset = pd.concat([x,y], axis=1)

In [57]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=cleaned_dataset,
    label_column_name='y',
    n_cross_validations=5)

In [58]:
# Submit your automl run

### YOUR CODE HERE ###
automl_run = exp.submit(automl_config)
automl_run.wait_for_completion(show_output=True)



Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_4257e258-dc81-4f53-b4b8-b6ea141a0616,automl,Preparing,Link to Azure Machine Learning studio,Link to Documentation


INFO:interpret_community.common.explanation_utils:Using default datastore for uploads


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_4257e258-dc81-4f53-b4b8-b6ea141a0616,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation




********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+------------------------------+--------------------------------+--------------------------------------+
|Size of the smallest class    |Name/Label of the smallest class|Number of samples in the training data|
|3692                          |1                               |32950                                 |
+------------------------------+--------------------------------+--------------------------------------+

********************************************************************

{'runId': 'AutoML_4257e258-dc81-4f53-b4b8-b6ea141a0616',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2023-07-24T18:53:10.959066Z',
 'endTimeUtc': '2023-07-24T19:24:26.504316Z',
 'services': {},
   'message': 'Experiment timeout reached, hence experiment stopped. Current experiment timeout: 0 hour(s) 30 minute(s)'}],
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'local',
  'DataPrepJsonString': None,
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-widgets": "1.49.0", "azureml-training-tabular": "1.49.0", "azureml-train": "1.49.0", "azureml-train-restclients-hyperdrive": "1.49.0", "azureml-train-core": "1.49.0", "azureml-train-automl": "1.49.0", "azureml-train-automl



In [67]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###
# Retrieve and save your best automl model.
aml_best_run = automl_run.get_best_child()

aml_best_run.get_details()

{'runId': 'AutoML_4257e258-dc81-4f53-b4b8-b6ea141a0616_12',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2023-07-24T19:23:33.99956Z',
 'endTimeUtc': '2023-07-24T19:23:54.43127Z',
 'services': {},
 'properties': {'runTemplate': 'automl_child',
  'pipeline_id': '__AutoML_Ensemble__',
  'pipeline_spec': '{"pipeline_id":"__AutoML_Ensemble__","objects":[{"module":"azureml.train.automl.ensemble","class_name":"Ensemble","spec_class":"sklearn","param_args":[],"param_kwargs":{"automl_settings":"{\'task_type\':\'classification\',\'primary_metric\':\'accuracy\',\'verbosity\':20,\'ensemble_iterations\':15,\'is_timeseries\':False,\'name\':\'udacity-project\',\'compute_target\':\'local\',\'subscription_id\':\'b7f9dec1-d14e-4b56-82d2-3567170c57d9\',\'region\':\'brazilsouth\',\'spark_service\':None}","ensemble_run_id":"AutoML_4257e258-dc81-4f53-b4b8-b6ea141a0616_12","experiment_name":null,"workspace_name":"azuremlspecialization","subscription_id":"b7f9dec1-d14e-4b56-82d2-3567170c57d9"

In [68]:
# Registering model in workspace
aml_best_run.register_model(model_name='bank_subscription_automl', model_path='outputs/model.pkl')

Model(workspace=Workspace.create(name='azuremlspecialization', subscription_id='b7f9dec1-d14e-4b56-82d2-3567170c57d9', resource_group='defaultresourcegroup-eastus'), name=bank_subscription_automl, id=bank_subscription_automl:1, version=1, tags={}, properties={})

In [69]:
# deleting compute resources that are no longer going to be used.
cpu_cluster.delete()

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

