In [25]:
# Project 2 of the Udacity Nanodegree - Operationalizing Machine Learning
# The Bank Marketing data will be used - the challenge is to make a determination if the customer will make (or not make) a term deposit
# Gather all imports here
from azureml.core.workspace import Workspace
from azureml.core import Datastore
from azureml.core.compute import ComputeTarget
from azureml.exceptions import ComputeTargetException
from azureml.core.compute.amlcompute import AmlCompute
from azureml.core.experiment import Experiment
from azureml.core.dataset import Dataset
from azureml.core.run import Run

import pandas as pd

from azureml.train.automl.automlconfig import AutoMLConfig

from azureml.pipeline.steps.automl_step import AutoMLStep
from azureml.pipeline.core import PipelineData
from azureml.pipeline.core import TrainingOutput
from azureml.pipeline.core.pipeline import Pipeline

from azureml.pipeline.core.run import PipelineRun

from azureml.widgets.run_details import RunDetails

from azureml.core.webservice import Webservice


$Step$ $2$
# Step 2 - Automated ML Experiment

In [19]:
# Various Initializations

# projct_folder is needed as part of teh AutoMLConfig
PROJECT_FOLDER = 'operationalizing-ml'
PROJECT_DEBUG_LOG = 'operationalizingml.log'
PROJECT_LABEL_COLUN_NAME = 'y'

PROJECT_AUTOMLSTEP_NAME = 'AutoML Training Step'

PROJECT_EXPERIMENT_NAME_STEP2 = 'exp-project2-step2'

PROJECT_PIPELINEDATA_METRICS_NAME = 'PipelineData_Metrics' # can only contain letters, digits and _
PROJECT_PIPELINEDATA_MODEL_NAME = 'PipelineData_Model' # can only contain letters, digits and _
PROJECT_PIPELINE_OUTPUT_METRICS_NAME = 'Pipeline Metrics Output' # Must be unique in a pipeline
PROJECT_PIPELINE_OUTPUT_MODEL_NAME = 'Pipeline Model Output' # Must be unique in a pipeline
PROJECT_PIPELINE_DESCRIPTION = 'AutoML Pipeline to train model on the marketing bank data'
PROJECT_EXPERIMENT_NAME = 'AutoML Train Banking Data Experiment'
PROJECT_DEPLOYED_MODEL_NAME = 'Bank Term Deposit Likelihood'

In [20]:
# Get the workspace object which will be needed subsequently for most operations
ws = Workspace.from_config()

# Print some basic information from the workdpace as a FYI
print(f'name:{ws.name}, location:{ws.location}\nresource group:{ws.resource_group}, subscription id:{ws.subscription_id}')


name:quick-starts-ws-153642, location:southcentralus
resource group:aml-quickstarts-153642, subscription id:cdbe0b43-92a0-4715-838a-f2648cc7ad21


In [21]:
# Retrive the datastore (we will use the default datastore)
ds = ws.get_default_datastore()

In [None]:
# Next, let's use if it exists, or create if required, a compute cluster to be used by the ML
cc_name = "CPU-CC"  # CPU Compute Cluster

# Access the compute cluster. If it exists, we will have the compute object. If it does not exist, an exception will be thrown upon which the compute cluster can be created
try:
    project_cc = ComputeTarget(workspace=ws, name=cc_name)
    print(f'Compute Cluster target exists and we have a handle to the same')
except ComputeTargetException:
    # Failed to obtain the compute cluster object
    # In all likelihood, a compute cluster of that name has not been created
    # Attempt to create the compute cluster
    # First set up the configuration

    # Specify the configuration of the compute cluster
    cc_cfg = AmlCompute.provisioning_configuration(vm_size='Standard_DS12_v2', min_nodes=1, max_nodes=6)
    project_cc = ComputeTarget.create(workspace=ws, name=cc_name, provisioning_configuration=cc_cfg)

# At this point - we have access to the compute cluster object. Wait for the compute target to complete provisioing
project_cc.wait_for_completion(show_output='True')



In [22]:
# Create an experiment
proj_experiment_step2 = Experiment(workspace=ws, name=PROJECT_EXPERIMENT_NAME_STEP2)

In [23]:
proj_experiment_step2

Name,Workspace,Report Page,Docs Page
exp-project2-step2,quick-starts-ws-153642,Link to Azure Machine Learning studio,Link to Documentation


In [24]:
print(f'{proj_experiment_step2}')

Experiment(Name: exp-project2-step2,
Workspace: quick-starts-ws-153642)


In [None]:
# grab the data and create a dataset
# we will use logic similar to the compute target above
# See if the dataset already exists - if so, skip the Dataset creation pieces
# The Bank Marketing data may be found at - https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv
data_uri = 'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'

ds_name = 'Bankdata'
dsets = ws.datasets.keys()

if ds_name in dsets:
    # dataset exists
    proj_ds = dsets[ds_name]
else:
    # Data set not found. Must create it
    proj_ds = Dataset.Tabular.from_delimited_files(data_uri)
    # Register the dataset so tat on repeated runs, teh data does not have to be fethed evey time
    proj_ds.register(workspace=ws, name=ds_name, description='Marketing Bank data')

    

In [None]:
# Take a peek at the data by converting the same to a Pandas dataframe
proj_df = proj_ds.to_pandas_dataframe()

# print the data
proj_df

In [None]:
proj_df.describe()

In [None]:
# Next - prepare for the AutoML experiment
# Essentially, a pipeline is created and then submitted to be run

# Creating the pipeline involves
# * identifying the steps
# * identifying the outputs
#
# Identify the pipeline steps:
# Identifyingthe pipeline steps in turn involves specifing an AUtoMLStep
# An AutoMLStep in turn is associated with the AutoML configuration



In [None]:
# THat's where we will start - with specifying an AutoMLConfig
automl_config = AutoMLConfig(
    task='classification',
    path=PROJECT_FOLDER,
    iterations=20,
    primary_metric='AUC_weighted',
    compute_target=proj_CC,
    n_cross_validations=3,
    featurization='auto',
    max_concurrent_iterations=5,
    experiment_timeout_hours=0.5,
    enable_early_stoppint=True,
    model_explainability=True,
    debug_log=PROJECT_DEBUG_LOG,
    training_data=proj_ds,
    label_column_name=PROJECT_LABEL_COLUMN_NAME,
)

In [None]:
# Submit/Run this experiment
automl_run = proj_experiment_step2.submit(
    config=automl_config,
    show_output=True
)


automl_run.wait_for_completion(show_output=True)


In [None]:
# The above AutoML config feeds to the AutoMLStep.
# The AutoMLStep needs outputs to be defined as well before the step iteslf can
# be specified
# Let's set up the two types of outputs - metrics and model
#
metrics_output = PipelineData(
    name=PROJECT_PIPELINEDATA_METRICS_NAME,
    datastore=ds,
    pipeline_output_name=PROJECT_PIPELINE_OUTPUT_METRICS_NAME,
    training_output=TrainingOutput(type='Metrics')
)

model_output = PipelineData(
    name=PROJECT_PIPELINEDATA_MODEL_NAME,
    datastore=ds,
    pipeline_output_name=PROJECT_PIPELINE_OUTPUT_MODEL_NAME,
    training_output=TrainingOutput(type='Model')
)

In [None]:
# Next - build up the AutoMLStep
automl_step=AutoMLStep(
    name=PROJECT_AUTOMLSTEP_NAME,
    automl_config=automl_config,
    outeputs=[metrics_output, model_output],
    allow_reuse=True
)

In [None]:
# Now that we have the step and outputs specfied - we are ready
# to prime the pipeline

proj_pipeline = Pipeline(
    workspace=ws,
    steps=automl_step,
    description=PROJECT_PIPELINE_DESCRIPTION,
    default_datastore=ds
)


In [None]:
# To perform teh training, the pipeline needs to be run
# That is accomplished by submitting a run
# NOte that any applicable pipeline parameers will be specified here
# The output of the submit is the PipelineRun object which may be queried
# for the run satus


proj_pipeline_run = proj_pipeline.submit(
    experiment_name=PROJECT_EXPERIMENT_NAME
)

In [None]:
# use the RunDetails widget to gwt the details regarding the run in real time
# Note that the display is updatd asyncronously and control is returned back to run the subsequent code
RunDetails(proj_pipeline_run).show()

In [None]:
# Wait for the pipeline run to complte before we do anything else
# Once complete, the staus is return and assert on the status of Finished
assert(proj_pipeline_run.wait_for_completion(show_output=True) == 'Finished')

# An AutoML training Pipeline run on the bank marketing data is completed

$Step$ $2$
# Capture appropriate screenshots

#### This marks the end of Step 2 - Automated ML Experiment of the project

$Step$ $3$
# Step 3 - Deploy the Best Model
Per various knowledge base articles - this is done inthe AutoML Studdio and not programmatically
Note that the deployed model will be associated with a name (like 'Bank Term Deposit Likelihood') and the name used will be reeferenced below
to turn on Application Insights

# Capture appropriate screenshots

##### End of Step 3

$Step$ $4$
# Step 4 - Enable Logging (Application Insights)

In [None]:
# To enable ApplicationInsights on the service (webservice), 
# * first access the endpoint using the name assigned at the time of deployment
# * next update webservice parameters such as enabling application insights (enable_app_insights)

proj_webservice = Webservice(
    workspace = ws,
    name=PROJECT_DEPLYED_MODEL_NAME
)
# Noe 
enable_app_insights
# Noe 
proj_webservice.update(
    enable_app_insights=True
)

# At this point application insights (logging is enabled) and can be
# checked in the GUI in AutoML studio


$Step$ $4$
# Next, 
Run the provided logs.py (making the appropriate changes to the code to reference the correct URI)
Ensure logs are displayed

# Take screesnhots as required

#### End of Step 4 - Enable Logging (applicationInsights)

# Step 5 - Swagger Documentation

Step 5 is not implemented as part of this notebook. It is executed independently by
running the scripts as requested

# Capture appropriate screenshots

##### End of Step 5 - Swagger Documentation

# Step 6 - Consume Model Endpoints
Step 6 also is not part of this notebook. The project instructions are to be followed and the
independent scripts use to complete this step.import

## Capture appropriate screenshots.

#### End of Step 6 - Consume Model endpoints