

# Manual setup of AML Experiment and service endpoint 

In [2]:

import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.27.0


## Initialize workspace

In [3]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

quick-starts-ws-144210
aml-quickstarts-144210
southcentralus
2c48c51c-bd47-40d4-abbe-fb8eabd19c8c


## Create or pick experiment

In [4]:
# Choose a name for the run history container in the workspace.
# NOTE: update these to match your existing experiment name
experiment_name = 'ml-experiment-1'
project_folder = './pipeline-project'

experiment = Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
ml-experiment-1,quick-starts-ws-144210,Link to Azure Machine Learning studio,Link to Documentation


## Create compute target

In [5]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name = "auto-ml"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           min_nodes=4,
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)
# For a more detailed view of current AmlCompute status, use get_status().

Creating...
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded.....................
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Get data

In [6]:
# Try to load the dataset from the Workspace. Otherwise, create it from the file
# NOTE: update the key to match the dataset name
found = False
key = "Bank-marketing" #"BankMarketing Dataset"
description_text = "Bank Marketing DataSet for Udacity Course 2"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
        # Create AML Dataset and register it into Workspace
        example_data = 'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)        
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,age,duration,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
count,32950.0,32950.0,32950.0,32950.0,32950.0,32950.0,32950.0,32950.0,32950.0,32950.0
mean,40.040212,257.335205,2.56173,962.17478,0.17478,0.076228,93.574243,-40.51868,3.615654,5166.859608
std,10.432313,257.3317,2.763646,187.646785,0.496503,1.572242,0.578636,4.623004,1.735748,72.208448
min,17.0,0.0,1.0,0.0,0.0,-3.4,92.201,-50.8,0.634,4963.6
25%,32.0,102.0,1.0,999.0,0.0,-1.8,93.075,-42.7,1.344,5099.1
50%,38.0,179.0,2.0,999.0,0.0,1.1,93.749,-41.8,4.857,5191.0
75%,47.0,318.0,3.0,999.0,0.0,1.4,93.994,-36.4,4.961,5228.1
max,98.0,4918.0,56.0,999.0,7.0,1.4,94.767,-26.9,5.045,5228.1


## Check data

In [7]:

dataset.take(5).to_pandas_dataframe()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,57,technician,married,high.school,no,no,yes,cellular,may,mon,...,1,999,1,failure,-1.8,92.893,-46.2,1.299,5099.1,no
1,55,unknown,married,unknown,unknown,yes,no,telephone,may,thu,...,2,999,0,nonexistent,1.1,93.994,-36.4,4.86,5191.0,no
2,33,blue-collar,married,basic.9y,no,no,no,cellular,may,fri,...,1,999,1,failure,-1.8,92.893,-46.2,1.313,5099.1,no
3,36,admin.,married,high.school,no,no,no,telephone,jun,fri,...,4,999,0,nonexistent,1.4,94.465,-41.8,4.967,5228.1,no
4,27,housemaid,married,high.school,no,yes,no,cellular,jul,fri,...,2,999,0,nonexistent,1.4,93.918,-42.7,4.963,5228.1,no


## Auto ML Config

In [8]:
automl_settings = {
    "experiment_timeout_minutes": 15,
    "max_concurrent_iterations": 4,
    "primary_metric" : 'AUC_weighted'
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="y",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             model_explainability = True,
                             enable_onnx_compatible_models=True,
                             **automl_settings
                            )

##Auto ML Run

In [9]:

from azureml.core.experiment import Experiment
gy_automl_experiment = Experiment(ws, "bankmarketing-experiment-gy")
automl_run1 = gy_automl_experiment.submit(automl_config, show_output=True)

Submitting remote run.
No run_configuration provided, running on auto-ml with default configuration
Running on remote compute: auto-ml


Experiment,Id,Type,Status,Details Page,Docs Page
bankmarketing-experiment-gy,AutoML_2a671a7c-1d23-45bc-abda-4c0d70e75ada,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Train-Test data split
STATUS:       DONE
DESCRIPTION:  Your input data has been split into a training dataset and a holdout test dataset for validation of the model. The test holdout dataset reflects the original distribution of your input data.
              
DETAILS:      
+---------------------------------+---------------------------------+---------------------------------+
|Dataset                          |Row counts                       |Percentage                       |
|train                            |29655                            |90.0           

## Proof for completion

In [10]:
rns = gy_automl_experiment.get_runs()
next(rns)


Experiment,Id,Type,Status,Details Page,Docs Page
bankmarketing-experiment-gy,AutoML_2a671a7c-1d23-45bc-abda-4c0d70e75ada,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation


## Get best model

In [11]:
best_auto_run = automl_run1.get_best_child()

print("Best run ID: "+ str(best_auto_run.id))

# Best run id: AutoML_2a671a7c-1d23-45bc-abda-4c0d70e75ada_49

Best run ID: AutoML_2a671a7c-1d23-45bc-abda-4c0d70e75ada_49


## Enable Application Insights on deployed model

In [12]:
### Enabling application insights
from azureml.core import Webservice
name = 'bankmarketing-model-dply3'
service = Webservice(name=name, workspace=ws)
service.update(enable_app_insights=True)