In [None]:
from azureml.core import Workspace, Experiment

ws = Workspace.get(name="quick-starts-ws-130655")
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')



In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
cluster_name = "drao-aml-cluster"

try:
    # Check for existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name, )
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4, min_nodes=1)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

In [None]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.hyperdrive import choice
import os

# Create a Python environment for the experiment
sklearn_env = Environment("sklearn-env")

# Ensure the required packages are installed (we need scikit-learn, Azure ML defaults, and Azure ML dataprep)
packages = CondaDependencies.create(conda_packages=['scikit-learn','pip'],
                                    pip_packages=['azureml-defaults','azureml-dataprep[pandas]'])
sklearn_env.python.conda_dependencies = packages

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        # Hyperdrive will try 6 combinations, adding these as script arguments
        '--C': choice(100, 10, 1.0),
        '--max_iter' : choice(500, 1000, 10000)
    }
)

# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")
    
experiment_folder='./training'
# Create a SKLearn estimator for use with train.py
est = ScriptRunConfig(source_directory=experiment_folder,
                                script='train.py',                                
                                environment=sklearn_env,
                                compute_target = training_cluster)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=est, 
                          hyperparameter_sampling=ps, 
                          policy=None, # No early stopping policy
                          primary_metric_name='Accuracy', # Find the highest AUC metric
                          primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                          max_total_runs=8, # Restict the experiment to 6 iterations
                          max_concurrent_runs=2)

In [None]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

# Run the experiment
hd_experiment = Experiment(workspace=ws, name='bankmarketing-hyperdrive')
hd_run = hd_experiment.submit(config=hyperdrive_config)



#AutoML

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

ds_tab = TabularDatasetFactory.from_delimited_files(path="https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv")


In [None]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    compute_target = training_cluster,
    task='classification',
    primary_metric='accuracy',
    training_data = ds_tab,
    label_column_name='y')

In [2]:
# Submit your automl run

automl_experiment = Experiment(ws, 'bankmarketing-automl-sdk')
automl_run = automl_experiment.submit(automl_config)


#With Cleaned Data

In [None]:
from training.train import clean_data
from azureml.core import Workspace, Dataset

# Use the clean_data function to clean your data.
x, y = clean_data(ds_tab)
dataframe = x.copy()
dataframe['y']=y
local_path = 'data/prepared.csv'
dataframe.to_csv(local_path)

# get the datastore to upload prepared data
datastore = ws.get_default_datastore()

# upload the local file from src_dir to the target_path in datastore
datastore.upload(src_dir='data', target_path='data')

# create a dataset referencing the cloud location
dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, ('data/prepared.csv'))])

In [None]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    compute_target = training_cluster,
    task='classification',
    primary_metric='accuracy',
    training_data = dataset,
    label_column_name='y')

In [None]:
automl2_experiment = Experiment(ws, 'bankmarketing-automl2-sdk')
automl2_run = automl2_experiment.submit(automl_config)


Job Analysis

In [None]:
# Show the status in the notebook as the experiment runs
RunDetails(hd_run).show()
RunDetails(automl_run).show()
RunDetails(automl2_run).show()


hd_run.wait_for_completion(show_output=True)
automl_run.wait_for_completion(show_output=True)
automl2_run.wait_for_completion(show_output=True)

In [None]:
import joblib
from azureml.core import Model
# Get your best run and save the model from that run.

for child_run in hd_run.get_children_sorted_by_primary_metric():
    print(child_run)

# Get the best run, and its metrics and arguments
best_run = hd_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
script_arguments = best_run.get_details() ['runDefinition']['arguments']
print('Best Run Id: ', best_run.id)
print(' -Accuracy:', best_run_metrics['Accuracy'])
print(' -Arguments:',script_arguments)

best_run.register_model(model_path='outputs/bankmarketing_model.pkl', model_name='bankmarketing_model',
                        tags={'Training context':'Hyperdrive'},
                        properties={'AUC': best_run_metrics['AUC'], 'Accuracy': best_run_metrics['Accuracy']})

In [None]:
from azureml.core import Model

best_run, fitted_model = automl_run.get_output()
print(best_run)
print(fitted_model)
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)
# Register model
best_run.register_model(model_path='outputs/model.pkl', model_name='bankmarketing_model_automl',
                        tags={'Training context':'Auto ML'},
                        properties={'AUC': best_run_metrics['AUC_weighted'], 'Accuracy': best_run_metrics['accuracy']})

In [None]:
from azureml.core import Model

best_run, fitted_model = automl2_run.get_output()
print(best_run)
print(fitted_model)
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)
# Register model
best_run.register_model(model_path='outputs/model.pkl', model_name='bankmarketing_model_automl2',
                        tags={'Training context':'Auto ML'},
                        properties={'AUC': best_run_metrics['AUC_weighted'], 'Accuracy': best_run_metrics['accuracy']})