# Automated ML


In [1]:
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails
from azureml.core import Model
import joblib


## Dataset

### Overview

The data contains details of a bank's customers and the target variable is a binary variable reflecting the fact whether the customer left the bank (closed his account) or he continues to be a customer. There are 12 different variables used to predict the churn of a customer, and the data contains around 10,000 such instances.


In [2]:
from azureml.data.dataset_factory import TabularDatasetFactory

data_uri = 'https://raw.githubusercontent.com/himanshu004/AZMLND_Capstone/main/data.csv'
ds = TabularDatasetFactory.from_delimited_files(data_uri)
data = ds.to_pandas_dataframe().dropna()


In [3]:
ws = Workspace.from_config()

experiment_name = 'capstone-automl'

experiment = Experiment(ws, experiment_name)

In [4]:
def cluster_validation():
    cluster = 'capstone-cluster'
    try:
        compute_cluster = ComputeTarget(workspace = ws, name = cluster)
        print('Found existing compute cluster!')
    except ComputeTargetException:
        config = AmlCompute.provisioning_configuration(vm_size = 'STANDARD_D2_V2',max_nodes = 4)
        compute_cluster = ComputeTarget.create(ws, cluster, config)
    compute_cluster.wait_for_completion(show_output = True)
    return compute_cluster


In [5]:
#creates new cluster or returns an existing one
compute_cluster = cluster_validation()


Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [6]:
automl_config = AutoMLConfig(
    experiment_timeout_minutes = 30,
    task = 'classification',
    primary_metric = 'accuracy',
    training_data = ds,
    label_column_name = 'Exited',
    n_cross_validations = 7,
    compute_target = compute_cluster,
    iterations = 5)


In [7]:
remote_run = experiment.submit(automl_config,show_output = False)

Running on remote.


## Run Details

As expected, ensembling machine learning models give the best results, with VotingEnsemble giving the highest training accuracy of 0.864.
Other models like, StackEnsemble and XGBoostClassifier also gave almost similar results, but they might have overfitted the data as the data des not have many instances.


In [8]:
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…


Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

******************************************************************

{'runId': 'AutoML_6c50ec2c-4b2b-441f-b150-d0f3e0bbc484',
 'target': 'capstone-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-02-01T07:49:51.577948Z',
 'endTimeUtc': '2021-02-01T08:06:29.474259Z',
 'properties': {'num_iterations': '5',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '7',
  'target': 'capstone-cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"2ac09348-104b-4b81-8553-e212c14322b4\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 4, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://raw.githubusercontent.com/himanshu004/AZMLND_Capstone/main/data.csv\\\\\\"}]}}, \\\\\\"localData\\\\\\": {}, \\\\\\"isEnabled\\\\\\": true, \\\\\\"name\\\\\\": 

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [9]:
best_run, best_model = remote_run.get_output()

print(best_run)

best_run_metrics = best_run.get_metrics()

for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)


Run(Experiment: capstone-automl,
Id: AutoML_6c50ec2c-4b2b-441f-b150-d0f3e0bbc484_3,
Type: azureml.scriptrun,
Status: Completed)
precision_score_micro 0.8646991336772637
precision_score_weighted 0.8569431865487146
recall_score_micro 0.8646991336772637
f1_score_macro 0.7543564867047488
precision_score_macro 0.8248612087463675
log_loss 0.33270973744037663
recall_score_weighted 0.8646991336772637
f1_score_weighted 0.8519240843548522
f1_score_micro 0.8646991336772637
balanced_accuracy 0.7211992838419293
norm_macro_recall 0.4423985676838589
AUC_weighted 0.8675683975857131
AUC_macro 0.8675683975857131
AUC_micro 0.9317926384871351
average_precision_score_weighted 0.9051027136742283
matthews_correlation 0.5357916409798847
recall_score_macro 0.7211992838419293
average_precision_score_macro 0.8316066552267328
accuracy 0.8646991336772637
weighted_accuracy 0.933615711348257
average_precision_score_micro 0.9291814951082097
accuracy_table aml://artifactId/ExperimentRun/dcid.AutoML_6c50ec2c-4b2b-441f-

In [10]:
print(best_model)

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                                min_child_samples=20,
                                                                                                min_child_weight=0.001,
                                                                                                min_split_gain=0.0,
                                      

## Model Deployment

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [13]:
best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'scoring_file_v_1_0_0.py')
best_run.download_file('outputs/conda_env_v_1_0_0.yml', 'conda_env_v_1_0_0.yml')

In [46]:
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice

inference_config = InferenceConfig(entry_script = 'scoring_file_v_1_0_0.py',
                                    environment = best_run.get_environment())

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)
service4= Model.deploy(ws, "capstone-automl-service4", [best_saved_model], inference_config, deployment_config)
service4.wait_for_deployment(show_output = True)

print(service4.state)

print(service4.scoring_uri)

print(service4.swagger_uri)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running............................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy
http://6178d79f-1807-4e9e-a7c5-d55de94736dc.southcentralus.azurecontainer.io/score
http://6178d79f-1807-4e9e-a7c5-d55de94736dc.southcentralus.azurecontainer.io/swagger.json


In [23]:
print(ws.webservices)

# Choose the webservice you are interested in

from azureml.core import Webservice

service = Webservice(ws, 'capstone-automl-service2')
print(service.get_logs())

{'capstone-automl-service2': AciWebservice(workspace=Workspace.create(name='quick-starts-ws-136229', subscription_id='a24a24d5-8d87-4c8a-99b6-91ed2d2df51f', resource_group='aml-quickstarts-136229'), name=capstone-automl-service2, image_id=None, compute_type=None, state=ACI, scoring_uri=None, tags=None, properties={}, created_by={'hasInferenceSchema': 'False', 'hasHttps': 'False'}), 'capstone-automl-service': AciWebservice(workspace=Workspace.create(name='quick-starts-ws-136229', subscription_id='a24a24d5-8d87-4c8a-99b6-91ed2d2df51f', resource_group='aml-quickstarts-136229'), name=capstone-automl-service, image_id=None, compute_type=None, state=ACI, scoring_uri=None, tags=None, properties={}, created_by={'hasInferenceSchema': 'False', 'hasHttps': 'False'})}
2021-01-27T10:33:02,659357147+00:00 - iot-server/run 
2021-01-27T10:33:02,660022376+00:00 - rsyslog/run 
2021-01-27T10:33:02,663453127+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_265db83b0c6014ce472c5de2f0b97e04/lib/lib

In [29]:
import json

test_data = data.sample(10)
y_test = test_data.pop('Exited')

sample = json.dumps({'data': test_data.to_dict(orient='records')})

print(sample)

{"data": [{"Column1": 8387, "CreditScore": 699, "Gender": 1, "Age": 44, "Tenure": 8, "Balance": 158697.61, "NumOfProducts": 1, "HasCrCard": 1, "IsActiveMember": 0, "EstimatedSalary": 107181.22, "Geography_France": 1, "Geography_Germany": 0, "Geography_Spain": 0}, {"Column1": 3711, "CreditScore": 762, "Gender": 0, "Age": 34, "Tenure": 8, "Balance": 98592.88, "NumOfProducts": 1, "HasCrCard": 0, "IsActiveMember": 1, "EstimatedSalary": 191790.29, "Geography_France": 0, "Geography_Germany": 1, "Geography_Spain": 0}, {"Column1": 9610, "CreditScore": 455, "Gender": 1, "Age": 40, "Tenure": 1, "Balance": 0.0, "NumOfProducts": 3, "HasCrCard": 0, "IsActiveMember": 1, "EstimatedSalary": 129975.34, "Geography_France": 1, "Geography_Germany": 0, "Geography_Spain": 0}, {"Column1": 8546, "CreditScore": 553, "Gender": 1, "Age": 35, "Tenure": 2, "Balance": 158584.28, "NumOfProducts": 2, "HasCrCard": 1, "IsActiveMember": 0, "EstimatedSalary": 43640.16, "Geography_France": 0, "Geography_Germany": 1, "Geog

In [31]:
import requests

headers = {'Content-type': 'application/json'}

response = requests.post(service3.scoring_uri, sample, headers = headers)

In [42]:
print(response.text)
print(y_test)

import numpy as np
y_pred = np.zeros(10)
i = 0
for ch in response.text:
    if(ch == '0' or ch == '1'):
        y_pred[i] = int(ch)
        i += 1

from sklearn.metrics import accuracy_score

print('\nAccuracy Score on Test Data: ',accuracy_score(y_test,y_pred) * 100)

"{\"result\": [0, 0, 1, 0, 0, 0, 1, 0, 0, 0]}"
8387    0
3711    1
9610    0
8546    0
1935    0
4774    0
7962    1
255     0
9742    0
3869    0
Name: Exited, dtype: int64

Accuracy Score on Test Data:  80.0


In [44]:
print(service3.get_logs())

2021-01-27T10:42:55,682363373+00:00 - iot-server/run 
2021-01-27T10:42:55,682363273+00:00 - gunicorn/run 
2021-01-27T10:42:55,681495799+00:00 - rsyslog/run 
2021-01-27T10:42:55,683918506+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_265db83b0c6014ce472c5de2f0b97e04/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_265db83b0c6014ce472c5de2f0b97e04/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_265db83b0c6014ce472c5de2f0b97e04/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_265db83b0c6014ce472c5de2f0b97e04/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_265db83b0c6014ce472c5de2f0b97e04/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
rsyslogd

In [49]:
#deleting all the services created
service.delete()
service2.delete()
service3.delete()
service4.delete()