# Automated ML

Import Dependencies and Libraries

In [1]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.28.0


## Initialize Workspace and Experiment


In [2]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'titanic-survival-prediction'

experiment=Experiment(ws, experiment_name)

project_folder = './automl-project'

print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

personal-workspace
personal
eastus2
8fb18662-8aa6-4db6-8a37-65c1a334f920


## Initialize Compute Target

In [3]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name = "aml-compute"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=1)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

#compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)
#compute_target.get_status()

Found existing cluster, use it.


## Dataset

### Overview
This dataset contains the actual information about titanic passengers and whether or not each passenger survived. Obtained from https://data.world/nrippner/titanic-disaster-dataset

Features:

survived - Survival (0 = No; 1 = Yes)


class - Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd)


sex - Sex


age - Age


sibsp - Number of Siblings/Spouses Aboard


parch - Number of Parents/Children Aboard


fare - Passenger Fare


cabin - Cabin


embarked - Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)




In [32]:
from azureml.core import Workspace, Dataset

dataset = Dataset.get_by_name(ws, name='titanic-survival')
df = dataset.to_pandas_dataframe()
df.head()

Unnamed: 0,pclass,survived,sex,age,sibsp,parch,fare,cabin,embarked
0,1.0,1.0,female,29.0,0.0,0.0,211.34,B5,S
1,1.0,1.0,male,0.92,1.0,2.0,151.55,C22 C26,S
2,1.0,0.0,female,2.0,1.0,2.0,151.55,C22 C26,S
3,1.0,0.0,male,30.0,1.0,2.0,151.55,C22 C26,S
4,1.0,0.0,female,25.0,1.0,2.0,151.55,C22 C26,S


## AutoML Configuration

We will be setting the experiment to be timed out at 20 minutes to cut down on runtime and cost. 
Since the maximum number of nodes for the compute cluster is 1, the maximum number of concurrent iterations is set to 1. The primary metric is set to a weighted AUC since this is a classification problem with imbalanced classes. 
The target column is called 'survived'. Early stopping is enabled for time and cost efficiency. Featurization is set to auto which is default.

In [8]:
automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 1,
    "primary_metric" : 'AUC_weighted'
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="survived",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

In [9]:
# Submit your experiment
remote_run = experiment.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
titanic-survival-prediction,AutoML_e2f75cf8-aae1-4744-b7a0-46f65cd40677,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

 In the cell below, use the `RunDetails` widget to show the different experiments.

 Note: For some reason when I run the widget, I get network issues and am unable to save my notebook. I have to reload my notebook. Therefore I did not run this cell in this notebook as I wanted to go ahead with the subsequent cells. 
 
 I did however run it separately and have displayed the results in the project's readme file.

In [None]:
from azureml.widgets import RunDetails

RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

## Best Model

Get the best model from the automl experiments and display all the properties of the model.



In [15]:
import joblib

best_run, fitted_model = remote_run.get_output()
print(best_run,'\n')
print(fitted_model,'\n')
print(best_run.get_metrics(),'\n')
print(best_run.get_file_names(),'\n')
joblib.dump(fitted_model, './automl-project/best_automl_model.joblib')

Package:azureml-automl-runtime, training version:1.30.0, current version:1.28.0.post2
Package:azureml-core, training version:1.30.0, current version:1.28.0
Package:azureml-dataset-runtime, training version:1.30.0, current version:1.28.0
Package:azureml-defaults, training version:1.30.0, current version:1.28.0
Package:azureml-interpret, training version:1.30.0, current version:1.28.0
Package:azureml-mlflow, training version:1.30.0, current version:1.28.0
Package:azureml-pipeline-core, training version:1.30.0, current version:1.28.0
Package:azureml-telemetry, training version:1.30.0, current version:1.28.0
Package:azureml-train-automl-client, training version:1.30.0, current version:1.28.0
Package:azureml-train-automl-runtime, training version:1.30.0, current version:1.28.0


Run(Experiment: titanic-survival-prediction,
Id: AutoML_d1b014e0-0b3e-4350-bba6-843170b7786e_21,
Type: azureml.scriptrun,
Status: Completed) 

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mnt/batch/tasks/shared/LS_root/mount...
), random_state=0, reg_alpha=1.1458333333333335, reg_lambda=1.3541666666666667, subsample=0.6, tree_method='auto'))], verbose=False)), ('17', Pipeline(memory=None, steps=[('maxabsscaler', MaxAbsScaler(copy=True)), ('extratreesclassifier', ExtraTreesClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced', criterion='gini', max_depth=None, max_features='log2', max_leaf_nodes=None, max_samples=None, min_impurity_decrease=0.0, min_impurity_split=None, min

['./automl-project/best_automl_model.joblib']

In [16]:
#Load the best model

joblib.load('./automl-project/best_automl_model.joblib')

PipelineWithYTransformations(Pipeline={'memory': None,
                                       'steps': [('datatransformer',
                                                  DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mn...
), random_state=0, reg_alpha=1.1458333333333335, reg_lambda=1.3541666666666667, subsample=0.6, tree_method='auto'))], verbose=False)), ('17', Pipeline(memory=None, steps=[('maxabsscaler', MaxAbsScaler(copy=True)), ('extratreesclassifier', ExtraTreesClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced', criterion='gini', max_depth=None, max_features='log2', max_leaf_nodes=None, max_samples=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=0.01, min_samples_split=0.01, min_weight_fraction_leaf=0.0, n_es

In [17]:
# Save the environment of the best model
best_run.download_file('outputs/conda_env_v_1_0_0.yml','my_env.yml')

In [20]:
# Save the scoring script which will be used for deployment 
best_run.download_file('outputs/scoring_file_v_2_0_0.py','score.py')

## Model Deployment

In the cells below, register the model, create an inference config and deploy the model as a web service.

In [21]:
# Register best model
model = best_run.register_model(model_name='best_automl_model',
                           model_path='outputs/model.pkl')
print(model.name, model.id, model.version, sep='\t')

best_automl_model	best_automl_model:3	3


In [23]:
# Get model environment
from azureml.core import Environment
env = Environment.from_conda_specification(name='myenv', file_path='./my_env.yml')

In [27]:
# Deploy best model as an ACI webservice
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.model import InferenceConfig
from azureml.core.model import Model

inference_config = InferenceConfig(entry_script="score.py", environment=env)
deployment_config = AciWebservice.deploy_configuration(auth_enabled=True, enable_app_insights=True)
service = Model.deploy(ws, "automl-deploy", [model], inference_config, deployment_config)
service.wait_for_deployment(show_output = True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-06-12 21:26:22+00:00 Creating Container Registry if not exists..
2021-06-12 21:26:32+00:00 Registering the environment.
2021-06-12 21:26:33+00:00 Building image..
2021-06-12 21:41:35+00:00 Generating deployment configuration.
2021-06-12 21:41:36+00:00 Submitting deployment to compute..
2021-06-12 21:41:40+00:00 Checking the status of deployment automl-deploy..
2021-06-12 21:45:05+00:00 Checking the status of inference endpoint automl-deploy.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


## Consume Deployed Model
In the cell below, send a request to the web service you deployed to test it.

In [55]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

#Request data goes here
data = {
    "Inputs": {
        "data":
        [
            {
                'pclass': 1,
                'sex': "female",
                'age': 29,
                'sibsp': 0,
                'parch': 0,
                'fare': 211,
                'cabin': "B5",
                'embarked': "S",
            }, 
            {
                'pclass': 3,
                'sex': "male",
                'age': 29,
                'sibsp': 1,
                'parch': 1,
                'fare': 211,
                'cabin': "B5",
                'embarked': "S",
            }
        ],
    }
}

body = str.encode(json.dumps(data))

url = 'http://3abbe71b-d53e-4559-a598-5d0213b50c4c.eastus2.azurecontainer.io/score'

primary, secondary = service.get_keys()
api_key = primary

headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(json.loads(error.read().decode("utf8", 'ignore')))

b'[1.0, 0.0]'


## View Logs and Clean Up
In the cell below, print the logs of the web service and delete the service

In [56]:
print(service.get_logs())

2021-06-12T21:44:54,540977087+00:00 - rsyslog/run 
2021-06-12T21:44:54,640395887+00:00 - gunicorn/run 
2021-06-12T21:44:54,640394087+00:00 - iot-server/run 
2021-06-12T21:44:54,741701111+00:00 - nginx/run 
rsyslogd: /azureml-envs/azureml_844426f9a15fdc0e079dc03e91d975e1/lib/libuuid.so.1: no version information available (required by rsyslogd)
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-06-12T21:44:57,340704960+00:00 - iot-server/finish 1 0
2021-06-12T21:44:57,343196792+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 20.1.0
Listening at: http://127.0.0.1:31311 (13)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 42
SPARK_HOME not set. Skipping PySpark Initialization.
Generating new fontManager, this may take some time...
Initializing logger
2021-06-12 21:45:13,346 | root | INFO | Starting up app insights client
2021-06-12 21:45:13,347 | root | INFO | Starting up request id generator
2021-06-12 21:45

In [61]:
# Delete deployed webservice
service.delete()

No service with name automl-deploy found to delete.


In [None]:
# Delete compute cluster
compute_target.delete()