# Automated ML

## Import Dependencies

In [1]:
import os
import csv
from pprint import pprint
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.workspace import Workspace
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig
from azureml.core.compute import ComputeTarget, AmlCompute

from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.20.0


### 2. Initialize Workspace

In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

quick-starts-ws-137441
aml-quickstarts-137441
southcentralus
9b72f9e6-56c5-4c16-991b-19c652994860


### 3. Initialize Experiment

In [16]:
ws = Workspace.from_config()
experiment_name = 'auto-experiment'
experiment=Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
auto-experiment,quick-starts-ws-137441,Link to Azure Machine Learning studio,Link to Documentation


### 4. Create Compute Cluster

In [19]:
cpu_cluster_name = "automl-cluster"
vm_size='STANDARD_DS12_V2'

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except:
    compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size, max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

# Can poll for a minimum number of nodes and for a specific timeout. 
# If no min node count is provided it uses the scale settings for the cluster.
compute_target.wait_for_completion(show_output=True)

CreatingAmlCompute is getting created. Consider calling wait_for_completion() first


Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


### 5. Dataset

In [20]:
data = datasets.load_breast_cancer()
print(data.data.shape)
print(data.feature_names)
print(data.DESCR)

(569, 30)
['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - peri

In [21]:
df = pd.DataFrame(data.data, columns = data.feature_names)
df['target']=data.target
df.head(5)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [22]:
pd.Series(data.target).value_counts(normalize=True)

1    0.627417
0    0.372583
dtype: float64

**Our dataset is slightly imbalanced**

In [23]:
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.data.datapath import DataPath
# Create TabularDataset using TabularDatasetFactory
def_blob_store = ws.get_default_datastore()
print("Default datastore's name: {}".format(def_blob_store.name))
data_path = DataPath(datastore=def_blob_store, path_on_datastore='datapath')
ds = TabularDatasetFactory.register_pandas_dataframe(df, name='UCI_ML_Breast_Cancer', target=data_path)

Method register_pandas_dataframe: This is an experimental method, and may change at any time.<br/>For more information, see https://aka.ms/azuremlexperimental.


Default datastore's name: workspaceblobstore
Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to datapath/a493237a-46e6-4a52-9a11-849f7021632f/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


## AutoML Configuration:

Our task is `classification`

In [25]:
automl_settings = {
    "experiment_timeout_minutes": 50,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'accuracy'}

automl_config = AutoMLConfig(
    task='classification',
    training_data = ds,
    label_column_name = "target",
    compute_target=compute_target,
    **automl_settings)

## Run Details

In [26]:
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config)

RunDetails(remote_run).show()

Running on remote.


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [27]:
remote_run.wait_for_completion(show_output=True)


Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardinality feature detection
STATUS

{'runId': 'AutoML_c836f6e7-80fc-4592-90d9-a834c883e8d0',
 'target': 'automl-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-02-06T11:17:23.103572Z',
 'endTimeUtc': '2021-02-06T12:18:37.355752Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'automl-cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"a3fa77c3-6ff4-4d8c-8828-6e35c7c37906\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"datapath/a493237a-46e6-4a52-9a11-849f7021632f/\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"aml-quickstarts-137441\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"9b72f9e6-56c5-4c16-991b

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [28]:
# Retrieve and save your best automl model.
best_run, fitted_model = remote_run.get_output()
# get_metrics()
# Returns the metrics
print("Best run metrics :",best_run.get_metrics())
# get_details()
# Returns a dictionary with the details for the run
print("Best run details :",best_run.get_details())

Package:azureml-automl-runtime, training version:1.21.0, current version:1.20.0
Package:azureml-core, training version:1.21.0.post1, current version:1.20.0
Package:azureml-dataprep, training version:2.8.2, current version:2.7.3
Package:azureml-dataprep-native, training version:28.0.0, current version:27.0.0
Package:azureml-dataprep-rslex, training version:1.6.0, current version:1.5.0
Package:azureml-dataset-runtime, training version:1.21.0, current version:1.20.0
Package:azureml-defaults, training version:1.21.0, current version:1.20.0
Package:azureml-interpret, training version:1.21.0, current version:1.20.0
Package:azureml-pipeline-core, training version:1.21.0, current version:1.20.0
Package:azureml-telemetry, training version:1.21.0, current version:1.20.0
Package:azureml-train-automl-client, training version:1.21.0, current version:1.20.0
Package:azureml-train-automl-runtime, training version:1.21.0, current version:1.20.0


Best run metrics : {'log_loss': 0.06292871679260245, 'precision_score_weighted': 0.9897473449880956, 'AUC_macro': 0.9970281878347116, 'average_precision_score_weighted': 0.9973091865666263, 'precision_score_micro': 0.9894271075919889, 'AUC_micro': 0.9971423691009127, 'balanced_accuracy': 0.9874710144927537, 'f1_score_weighted': 0.9893925168489656, 'recall_score_weighted': 0.9894271075919889, 'AUC_weighted': 0.9970281878347114, 'weighted_accuracy': 0.9912267036445979, 'accuracy': 0.9894271075919889, 'precision_score_macro': 0.9904316770186335, 'f1_score_macro': 0.988736676906233, 'recall_score_micro': 0.9894271075919889, 'average_precision_score_macro': 0.9971782407478822, 'norm_macro_recall': 0.9749420289855074, 'average_precision_score_micro': 0.9972327130104282, 'matthews_correlation': 0.9778812019283727, 'f1_score_micro': 0.9894271075919889, 'recall_score_macro': 0.9874710144927537, 'confusion_matrix': 'aml://artifactId/ExperimentRun/dcid.AutoML_c836f6e7-80fc-4592-90d9-a834c883e8d0_

In [29]:
fitted_model._final_estimator

StackEnsembleClassifier(base_learners=[('42',
                                        Pipeline(memory=None,
                                                 steps=[('standardscalerwrapper',
                                                         <azureml.automl.runtime.shared.model_wrappers.StandardScalerWrapper object at 0x7f55596b7c88>),
                                                        ('logisticregression',
                                                         LogisticRegression(C=2222.996482526191,
                                                                            class_weight=None,
                                                                            dual=False,
                                                                            fit_intercept=True,
                                                                            intercept_scaling=1,
                                                                            l1_ratio=None,
              

In [30]:
print(fitted_model)

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('stackensembleclassifier',
                 StackE...
                                         meta_learner=LogisticRegressionCV(Cs=10,
                                                                           class_weight=None,
                                                                           cv=None,
                                                                           dual=False,
                           

In [61]:
# Get all metrics of the best run
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)

log_loss 0.06292871679260245
precision_score_weighted 0.9897473449880956
AUC_macro 0.9970281878347116
average_precision_score_weighted 0.9973091865666263
precision_score_micro 0.9894271075919889
AUC_micro 0.9971423691009127
balanced_accuracy 0.9874710144927537
f1_score_weighted 0.9893925168489656
recall_score_weighted 0.9894271075919889
AUC_weighted 0.9970281878347114
weighted_accuracy 0.9912267036445979
accuracy 0.9894271075919889
precision_score_macro 0.9904316770186335
f1_score_macro 0.988736676906233
recall_score_micro 0.9894271075919889
average_precision_score_macro 0.9971782407478822
norm_macro_recall 0.9749420289855074
average_precision_score_micro 0.9972327130104282
matthews_correlation 0.9778812019283727
f1_score_micro 0.9894271075919889
recall_score_macro 0.9874710144927537
confusion_matrix aml://artifactId/ExperimentRun/dcid.AutoML_c836f6e7-80fc-4592-90d9-a834c883e8d0_46/confusion_matrix
accuracy_table aml://artifactId/ExperimentRun/dcid.AutoML_c836f6e7-80fc-4592-90d9-a834c8

In [62]:
# Print detailed parameters of the fitted model
from pprint import pprint
def print_model(model, prefix=""):
    for step in model.steps:
        print(prefix + step[0])
        if hasattr(step[1], 'estimators') and hasattr(step[1], 'weights'):
            pprint({'estimators': list(
                e[0] for e in step[1].estimators), 'weights': step[1].weights})
            print()
            for estimator in step[1].estimators:
                print_model(estimator[1], estimator[0] + ' - ')
        else:
            pprint(step[1].get_params())
            print()

print_model(fitted_model)

datatransformer
{'enable_dnn': None,
 'enable_feature_sweeping': None,
 'feature_sweeping_config': None,
 'feature_sweeping_timeout': None,
 'featurization_config': None,
 'force_text_dnn': None,
 'is_cross_validation': None,
 'is_onnx_compatible': None,
 'logger': None,
 'observer': None,
 'task': None,
 'working_dir': None}

stackensembleclassifier
{'16': Pipeline(memory=None,
         steps=[('sparsenormalizer',
                 <azureml.automl.runtime.shared.model_wrappers.SparseNormalizer object at 0x7f554fe1bba8>),
                ('xgboostclassifier',
                 XGBoostClassifier(base_score=0.5, booster='gbtree',
                                   colsample_bylevel=1, colsample_bynode=1,
                                   colsample_bytree=0.7, eta=0.2, gamma=0,
                                   learning_rate=0.1, max_delta_step=0,
                                   max_depth=8, max_leaves=255,
                                   min_child_weight=1, missing=nan,
           

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

In the cell below, register the model, create an inference config and deploy the model as a web service.

In [71]:
#Save the model
model = best_run.register_model(model_path='outputs/model.pkl', model_name='automl_breast_cancer_predictor',
                                tags={'Training context':'Auto ML'},
                                properties={'Accuracy': best_run_metrics['accuracy']})

print(model)

Model(workspace=Workspace.create(name='quick-starts-ws-137441', subscription_id='9b72f9e6-56c5-4c16-991b-19c652994860', resource_group='aml-quickstarts-137441'), name=automl_breast_cancer_predictor, id=automl_breast_cancer_predictor:1, version=1, tags={'Training context': 'Auto ML'}, properties={'Accuracy': '0.9894271075919889'})


In [72]:
# Download scoring file 
best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'score.py')

# Download environment file
best_run.download_file('outputs/conda_env_v_1_0_0.yml', 'envFile.yml')

In [73]:
from azureml.core.model import InferenceConfig

inference_config = InferenceConfig(entry_script='score.py',
                                    environment=best_run.get_environment())

In [74]:
from azureml.core.webservice import AciWebservice
deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

In [77]:
from azureml.core import Model
service_name = 'breast-cancer-endpoint'
service = Model.deploy(ws, service_name, [model], inference_config, deployment_config)
service.wait_for_deployment(show_output=True)
print(service.state)
print(service.scoring_uri)
print(service.swagger_uri)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running..........................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy
http://0afeace3-e389-4ef2-8f59-0109b3c9ddd6.southcentralus.azurecontainer.io/score
http://0afeace3-e389-4ef2-8f59-0109b3c9ddd6.southcentralus.azurecontainer.io/swagger.json


In [90]:
service

AciWebservice(workspace=Workspace.create(name='quick-starts-ws-137441', subscription_id='9b72f9e6-56c5-4c16-991b-19c652994860', resource_group='aml-quickstarts-137441'), name=breast-cancer-endpoint, image_id=None, compute_type=None, state=ACI, scoring_uri=Healthy, tags=http://0afeace3-e389-4ef2-8f59-0109b3c9ddd6.southcentralus.azurecontainer.io/score, properties={}, created_by={'hasInferenceSchema': 'True', 'hasHttps': 'False'})

In the cell below, send a request to the web service you deployed to test it.

In [91]:
import json

test_df = df.sample(2)
label_df = test_df.pop('target')
test_sample = json.dumps({'data': test_df.to_dict(orient='records')})
print(test_sample)

{"data": [{"mean radius": 16.65, "mean texture": 21.38, "mean perimeter": 110.0, "mean area": 904.6, "mean smoothness": 0.1121, "mean compactness": 0.1457, "mean concavity": 0.1525, "mean concave points": 0.0917, "mean symmetry": 0.1995, "mean fractal dimension": 0.0633, "radius error": 0.8068, "texture error": 0.9017, "perimeter error": 5.455, "area error": 102.6, "smoothness error": 0.006048, "compactness error": 0.01882, "concavity error": 0.02741, "concave points error": 0.0113, "symmetry error": 0.01468, "fractal dimension error": 0.002801, "worst radius": 26.46, "worst texture": 31.56, "worst perimeter": 177.0, "worst area": 2215.0, "worst smoothness": 0.1805, "worst compactness": 0.3578, "worst concavity": 0.4695, "worst concave points": 0.2095, "worst symmetry": 0.3613, "worst fractal dimension": 0.09564}, {"mean radius": 17.27, "mean texture": 25.42, "mean perimeter": 112.4, "mean area": 928.8, "mean smoothness": 0.08331, "mean compactness": 0.1109, "mean concavity": 0.1204, "

In [93]:
%%time
import requests 

# Set the content type
headers = {'Content-type': 'application/json'}

response = requests.post(service.scoring_uri, test_sample, headers=headers)
print("response")
print(response.text)

response
"{\"result\": [0, 0]}"
CPU times: user 5 ms, sys: 360 µs, total: 5.36 ms
Wall time: 117 ms


In the cell below, print the logs of the web service and delete the service

In [87]:
print(service.get_logs())

2021-02-06T13:32:46,330507336+00:00 - rsyslog/run 
2021-02-06T13:32:46,330747364+00:00 - gunicorn/run 
2021-02-06T13:32:46,331029996+00:00 - iot-server/run 
2021-02-06T13:32:46,333378262+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_20a8278aa8b20dd48cc50f56a6d2586c/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_20a8278aa8b20dd48cc50f56a6d2586c/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_20a8278aa8b20dd48cc50f56a6d2586c/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_20a8278aa8b20dd48cc50f56a6d2586c/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_20a8278aa8b20dd48cc50f56a6d2586c/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
rsyslogd

In [94]:
service.delete()

In [95]:
model.delete()