In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-243947
Azure region: eastus2
Subscription id: cdbe0b43-92a0-4715-838a-f2648cc7ad21
Resource group: aml-quickstarts-243947


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute

cluster_name = "GIVE_A_CLUSTER_NAME"

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                              max_nodes=4)

my_cluster = ComputeTarget.create(ws, 'my-cluster', compute_config)

In [3]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps = RandomParameterSampling({'--C': uniform(0.1, 1.0), '--max_iter': choice(50,100,200)})

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(source_directory='.',
                            script='train.py',
                            compute_target=my_cluster,
                            environment=sklearn_env)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
                                     run_config=src,
                                     hyperparameter_sampling=ps,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=30,
                                     max_concurrent_runs=4,
                                     policy=policy,)

In [4]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
run = exp.submit(hyperdrive_config)

In [42]:
import joblib
# Get your best run and save the model from that run.

results = run.get_best_run_by_primary_metric()
print(results.get_metrics())
print(results.get_details()['runId'])


{'Regularization Strength:': 0.10438683161720207, 'Max iterations:': 100, 'Accuracy': 0.9120838697811293}
HD_95c96b4b-1463-4ff2-bac6-d62267d2c976_12


In [7]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
ds = TabularDatasetFactory.from_delimited_files(path = 'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv')

In [8]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

In [9]:
training_data = x.copy()
training_data['output'] = y

In [10]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=training_data,
    label_column_name='output',
    n_cross_validations=4)

In [None]:
# Submit your automl run

### YOUR CODE HERE ###
run2 = exp.submit(automl_config)


In [36]:
best_run, fitted_model = run2.get_output()


In [38]:
best_run.get_details().keys()

dict_keys(['runId', 'target', 'status', 'startTimeUtc', 'endTimeUtc', 'services', 'properties', 'inputDatasets', 'outputDatasets', 'logFiles', 'submittedBy'])

In [41]:
print("Best run metrics :",best_run.get_metrics())
print("Best run ID :",best_run.get_details()['runId'])

Best run metrics : {'recall_score_micro': 0.9176326918935258, 'matthews_correlation': 0.5635910386162768, 'balanced_accuracy': 0.7651850397029352, 'f1_score_macro': 0.780525905024849, 'average_precision_score_weighted': 0.9556208819760532, 'recall_score_weighted': 0.9176326918935258, 'accuracy': 0.9176326918935258, 'average_precision_score_macro': 0.8255647598905334, 'AUC_macro': 0.9476110021267454, 'weighted_accuracy': 0.9554943590681352, 'average_precision_score_micro': 0.9814980145601426, 'recall_score_macro': 0.7651850397029352, 'AUC_weighted': 0.9476110021267454, 'log_loss': 0.25736424926969287, 'precision_score_weighted': 0.9136090812351355, 'f1_score_micro': 0.9176326918935258, 'norm_macro_recall': 0.5303700794058703, 'precision_score_micro': 0.9176326918935258, 'f1_score_weighted': 0.9151176886057575, 'precision_score_macro': 0.7998303049128115, 'AUC_micro': 0.9807060956846387, 'confusion_matrix': 'aml://artifactId/ExperimentRun/dcid.AutoML_2d63726a-c590-4485-b7bd-ccc151c6d99c_

In [None]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###
my_cluster.delete()