In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print(
    "Workspace name: " + ws.name,
    "Azure region: " + ws.location,
    "Subscription id: " + ws.subscription_id,
    "Resource group: " + ws.resource_group,
    sep="\n",
)

run = exp.start_logging()


Workspace name: quick-starts-ws-191970
Azure region: southcentralus
Subscription id: a0a76bad-11a1-4a2d-9887-97a29122c8ed
Resource group: aml-quickstarts-191970


In [6]:
from azureml.core.compute import ComputeTarget, AmlCompute

cluster_name = "my-aml-cluster"

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
# form lecture #boiler plate

MAX_NODES = 4
VM_SIZE = "Standard_D2_V2"
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print("Found existing cluster, use it.")
except:
    print(f"Creating ({MAX_NODES} of {VM_SIZE})")
    compute_config = AmlCompute.provisioning_configuration(
        vm_size=VM_SIZE, max_nodes=MAX_NODES
    )
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)
print("Init compute resource done")
print(compute_target.get_status())


Creating (4 of Standard_D2_V2)
InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
Init compute resource done
<azureml.core.compute.amlcompute.AmlComputeStatus object at 0x7fab80bd0b50>


In [7]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        "--C": uniform(0.01, 10.0),
        "--max_iter": choice(64, 128, 256, 512),
    }
)

# Specify a Policy
# see https://azure.github.io/azureml-sdk-for-r/reference/bandit_policy.html#:~:text=Bandit%20is%20an%20early%20termination,the%20best%20performing%20training%20run.
policy = BanditPolicy(slack_factor=0.1, evaluation_interval=1, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(
    name="sklearn-env", file_path="conda_dependencies.yml"
)

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(
    source_directory=".",
    script="train.py",
    compute_target=compute_target,
    environment=sklearn_env,
)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    run_config=src,
    hyperparameter_sampling=ps,
    primary_metric_name="Accuracy",
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=4,
    policy=policy,
)  ### YOUR CODE HERE ###


In [8]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
hd_run = exp.submit(config=hyperdrive_config)
RunDetails(hd_run).show()
hd_run.wait_for_completion(show_output=True)


_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_8f8e24c6-381a-4d06-9e33-19e2ebc1c5fc
Web View: https://ml.azure.com/runs/HD_8f8e24c6-381a-4d06-9e33-19e2ebc1c5fc?wsid=/subscriptions/a0a76bad-11a1-4a2d-9887-97a29122c8ed/resourcegroups/aml-quickstarts-191970/workspaces/quick-starts-ws-191970&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

"<START>[2022-04-13T11:31:05.252250][API][INFO]Experiment created<END>\n""<START>[2022-04-13T11:31:16.975930][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n"<START>[2022-04-13T11:31:17.4038653Z][SCHEDULER][INFO]Scheduling job, id='HD_8f8e24c6-381a-4d06-9e33-19e2ebc1c5fc_0'<END><START>[2022-04-13T11:31:17.5175470Z][SCHEDULER][INFO]Scheduling job, id='HD_8f8e24c6-381a-4d06-9e33-19e2ebc1c5fc_1'<END><START>[2022-04-13T11:31:17.6259005Z][SCHEDULER][INFO]Scheduling job, id='HD_8f8e24c6-381a-4d06-9e33-19e2ebc1c5fc_2'<END>"<START>[2022-04-13T11:31:17.699590][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to 

{'runId': 'HD_8f8e24c6-381a-4d06-9e33-19e2ebc1c5fc',
 'target': 'my-aml-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-04-13T11:31:04.992952Z',
 'endTimeUtc': '2022-04-13T11:44:15.358667Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'ccbc2b98-6b09-42c6-aac2-a39aee10ebd4',
  'user_agent': 'python/3.8.5 (Linux-5.4.0-1073-azure-x86_64-with-glibc2.10) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.39.0',
  'space_size': 'infinite_space_size',
  'score': '0.9089529590288316',
  'best_child_run_id': 'HD_8f8e24c6-381a-4d06-9e33-19e2ebc1c5fc_0',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg191970.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_8f8e24c6-381a-4d06-9e33-19e2ebc1c5

In [9]:
import joblib

# Get your best run and save the model from that run.
best_run = hd_run.get_best_run_by_primary_metric()
metrics = best_run.get_metrics()
print(metrics)
print(f"Best Accuracy: {metrics['Accuracy']}")
# save the model
the_model = best_run.register_model("best_hd_model", model_path="./")
the_model


{'Regularization Strength:': 1.2825259388840535, 'Max iterations:': 512, 'Accuracy': 0.9089529590288316}
Best Accuracy: 0.9089529590288316


Model(workspace=Workspace.create(name='quick-starts-ws-191970', subscription_id='a0a76bad-11a1-4a2d-9887-97a29122c8ed', resource_group='aml-quickstarts-191970'), name=best_hd_model, id=best_hd_model:1, version=1, tags={}, properties={})

In [10]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at:
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
# same as in train.py
ds = TabularDatasetFactory.from_delimited_files(
    "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
)


In [11]:
from train import clean_data, TEST_SIZE, LUCKY_NUMBER
from sklearn.model_selection import train_test_split
import pandas as pd

# Use the clean_data function to clean your data.
x, y = clean_data(ds)
# split it as in train.py
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=TEST_SIZE, random_state=LUCKY_NUMBER
)

combine = pd.concat([x_train, y_train], axis=1)


In [12]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=15,
    task="classification",
    primary_metric="accuracy",
    training_data=combine,
    label_column_name="y",
    n_cross_validations=5,
)


In [13]:
# Submit your automl run

### YOUR CODE HERE ###
automl_run = Experiment(workspace=ws, name="udacity-prj-1-automl").submit(automl_config)
automl_run.wait_for_completion(show_output=True)
# show it
RunDetails(automl_run).show()


2022-04-13:11:46:31,279 INFO     [modeling_bert.py:226] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
2022-04-13:11:46:31,321 INFO     [modeling_xlnet.py:339] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-prj-1-automl,AutoML_b159e052-0d73-4df4-bff6-dcf278b69f91,automl,Preparing,Link to Azure Machine Learning studio,Link to Documentation


2022-04-13:12:03:36,287 INFO     [explanation_client.py:334] Using default datastore for uploads


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-prj-1-automl,AutoML_b159e052-0d73-4df4-bff6-dcf278b69f91,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation




********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+------------------------------+--------------------------------+--------------------------------------+
|Size of the smallest class    |Name/Label of the smallest class|Number of samples in the training data|
|2951                          |1                               |26360                                 |
+------------------------------+--------------------------------+--------------------------------------+

********************************************************************

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [16]:
# Retrieve and save your best automl model.
# I do not know if it worked from here, since the workspace was deleted before i cloud finish
# all data in the report was taken from the UI. Before i was completly shut out
### YOUR CODE HERE ###
best_automl_run, automl_model = automl_run.get_output()
automl_metrics = best_automl_run.get_metrics()
best_automl_run.register_model(model_name="best_automl_model", model_path=".")


Model(workspace=Workspace.create(name='quick-starts-ws-191970', subscription_id='a0a76bad-11a1-4a2d-9887-97a29122c8ed', resource_group='aml-quickstarts-191970'), name=best_automl_model, id=best_automl_model:2, version=2, tags={}, properties={})

In [17]:
automl_metrics

{'average_precision_score_weighted': 0.9559732765692169,
 'f1_score_macro': 0.7725438695198843,
 'f1_score_weighted': 0.9135487055770154,
 'recall_score_weighted': 0.9176783004552351,
 'matthews_correlation': 0.5503317941842745,
 'norm_macro_recall': 0.4978976746765854,
 'log_loss': 0.2587661076440041,
 'AUC_macro': 0.9478558468505603,
 'AUC_weighted': 0.9478558468505603,
 'f1_score_micro': 0.9176783004552351,
 'weighted_accuracy': 0.9595701023754085,
 'balanced_accuracy': 0.7489488373382927,
 'recall_score_micro': 0.9176783004552351,
 'recall_score_macro': 0.7489488373382927,
 'precision_score_weighted': 0.9115081268013301,
 'average_precision_score_macro': 0.82736950048252,
 'precision_score_micro': 0.9176783004552351,
 'average_precision_score_micro': 0.9816419643505296,
 'AUC_micro': 0.9809706301910512,
 'precision_score_macro': 0.8043361579851211,
 'accuracy': 0.9176783004552351,
 'confusion_matrix': 'aml://artifactId/ExperimentRun/dcid.AutoML_b159e052-0d73-4df4-bff6-dcf278b69f91_

In [21]:
best_automl_run.__dict__ 

{'_experiment': Experiment(Name: udacity-prj-1-automl,
 Workspace: quick-starts-ws-191970),
 '_run_id': 'AutoML_b159e052-0d73-4df4-bff6-dcf278b69f91_7',
 '_identity': 'Run#AutoML_b159e052-0d73-4df4-bff6-dcf278b69f91_7',
 '_logger': <Logger azureml.Run#AutoML_b159e052-0d73-4df4-bff6-dcf278b69f91_7 (INFO)>,
 '_portal_url': 'https://ml.azure.com',
 '_formatted_tid': '&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254',
 '_workspace_url': 'https://ml.azure.com?wsid=/subscriptions/a0a76bad-11a1-4a2d-9887-97a29122c8ed/resourcegroups/aml-quickstarts-191970/workspaces/quick-starts-ws-191970&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254',
 '_experiment_url': 'https://ml.azure.com/experiments/id/cd008e5d-7288-440b-99fd-80f1d639ce13?wsid=/subscriptions/a0a76bad-11a1-4a2d-9887-97a29122c8ed/resourcegroups/aml-quickstarts-191970/workspaces/quick-starts-ws-191970&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254',
 '_run_details_url': 'https://ml.azure.com/runs/AutoML_b159e052-0d73-4df4-bff6-dcf278b69f91_7?wsid=/subscripti

In [22]:
automl_model

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mnt/batch/tasks/shared/LS_root/mount...
                 PreFittedSoftVotingClassifier(classification_labels=array([0, 1]), estimators=[('0', Pipeline(memory=None, steps=[('maxabsscaler', MaxAbsScaler(copy=True)), ('lightgbmclassifier', LightGBMClassifier(min_data_in_leaf=20, n_jobs=1, problem_info=ProblemInfo(gpu_training_param_dict={'processing_unit_type': 'cpu'}), random_state=None))], verbose=False)), ('4', Pipeline(memory=None, steps=[('maxabsscaler', MaxAbsScaler(copy=True)), ('lightgbmclassifier', LightGBMClassifier(boosting_type='gbdt', colsample_bytree=0.6933333333333332, learning_rate=0.09473736842105263, max_bin=110, max_depth=8,

In [23]:
# delete the cluster
print("Deleting the cluster...")
compute_target.delete()


Deleting the cluster...


In [24]:
compute_target

AmlCompute(workspace=Workspace.create(name='quick-starts-ws-191970', subscription_id='a0a76bad-11a1-4a2d-9887-97a29122c8ed', resource_group='aml-quickstarts-191970'), name=my-aml-cluster, id=/subscriptions/a0a76bad-11a1-4a2d-9887-97a29122c8ed/resourceGroups/aml-quickstarts-191970/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-191970/computes/my-aml-cluster, type=AmlCompute, provisioning_state=Deleting, location=southcentralus, tags=None)

Current provisioning state of AmlCompute is "Deleting"

