# Automated ML

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import logging
import os
import csv
import joblib, pickle

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.27.0


In [8]:
ws = Workspace.from_config()

experiment_name = 'customer-churn'
project_folder = './pipeline-project'

experiment=Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
customer-churn,quick-starts-ws-144229,Link to Azure Machine Learning studio,Link to Documentation


## Create or Attach an AmlCompute Cluster

In [4]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name = "capstone-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)
# For a more detailed view of current AmlCompute status, use get_status().

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Dataset

### Overview
This notebook is used to predict whether a customer will change telecommunications provider, something known as "churning".

The dataset is part of the Kaggle Customer Churn Prediction 2020 (https://www.kaggle.com/c/customer-churn-prediction-2020/overview).

The training dataset contains 4250 samples. Each sample contains 19 features and 1 boolean variable "churn" which indicates the class of the sample. The 19 input features and 1 target variable are:

"state", string. 2-letter code of the US state of customer residence
"account_length", numerical. Number of months the customer has been with the current telco provider
"area_code", string="area_code_AAA" where AAA = 3 digit area code.
"international_plan", . The customer has international plan.
"voice_mail_plan", . The customer has voice mail plan.
"number_vmail_messages", numerical. Number of voice-mail messages.
"total_day_minutes", numerical. Total minutes of day calls.
"total_day_calls", numerical. Total minutes of day calls.
"total_day_charge", numerical. Total charge of day calls.
"total_eve_minutes", numerical. Total minutes of evening calls.
"total_eve_calls", numerical. Total number of evening calls.
"total_eve_charge", numerical. Total charge of evening calls.
"total_night_minutes", numerical. Total minutes of night calls.
"total_night_calls", numerical. Total number of night calls.
"total_night_charge", numerical. Total charge of night calls.
"total_intl_minutes", numerical. Total minutes of international calls.
"total_intl_calls", numerical. Total number of international calls.
"total_intl_charge", numerical. Total charge of international calls
"number_customer_service_calls", numerical. Number of calls to customer service
"churn", . Customer churn - target variable.



In [3]:
# Try to load the dataset from the Workspace. 
found = False
key = "customer-churn"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,account_length,number_vmail_messages,total_day_minutes,total_day_calls,total_day_charge,total_eve_minutes,total_eve_calls,total_eve_charge,total_night_minutes,total_night_calls,total_night_charge,total_intl_minutes,total_intl_calls,total_intl_charge,number_customer_service_calls
count,4250.0,4250.0,4250.0,4250.0,4250.0,4250.0,4250.0,4250.0,4250.0,4250.0,4250.0,4250.0,4250.0,4250.0,4250.0
mean,100.236235,7.631765,180.2596,99.907294,30.644682,200.173906,100.176471,17.015012,200.527882,99.839529,9.023892,10.256071,4.426353,2.769654,1.559059
std,39.698401,13.439882,54.012373,19.850817,9.182096,50.249518,19.908591,4.271212,50.353548,20.09322,2.265922,2.760102,2.463069,0.745204,1.311434
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,73.0,0.0,143.325,87.0,24.365,165.925,87.0,14.1025,167.225,86.0,7.5225,8.5,3.0,2.3,1.0
50%,100.0,0.0,180.45,100.0,30.68,200.7,100.0,17.06,200.45,100.0,9.02,10.3,4.0,2.78,1.0
75%,127.0,16.0,216.2,113.0,36.75,233.775,114.0,19.8675,234.7,113.0,10.56,12.0,6.0,3.24,2.0
max,243.0,52.0,351.5,165.0,59.76,359.3,170.0,30.54,395.0,175.0,17.77,20.0,20.0,5.4,9.0


In [7]:
df.head(5)

Unnamed: 0,state,account_length,area_code,international_plan,voice_mail_plan,number_vmail_messages,total_day_minutes,total_day_calls,total_day_charge,total_eve_minutes,total_eve_calls,total_eve_charge,total_night_minutes,total_night_calls,total_night_charge,total_intl_minutes,total_intl_calls,total_intl_charge,number_customer_service_calls,churn
0,OH,107,area_code_415,no,yes,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1,no
1,NJ,137,area_code_415,no,no,0,243.4,114,41.38,121.2,110,10.3,162.6,104,7.32,12.2,5,3.29,0,no
2,OH,84,area_code_408,yes,no,0,299.4,71,50.9,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78,2,no
3,OK,75,area_code_415,yes,no,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73,3,no
4,MA,121,area_code_510,no,yes,24,218.2,88,37.09,348.5,108,29.62,212.6,118,9.57,7.5,7,2.03,3,no


## AutoML Configuration

The AutoML experiment will timeout after 20 minutes and uses a maximum of 5 concurrent iterations. It is, however, very possible to adjust those parameters (though a deep learning experiment has a limit of 24 hours).
The primary metric is AUC_weighted (area under the curve weighted), which is the metric I want to optimize. The best-fit model will be chosen based on this metric. The binary classification predicts, whether a customer is going to leave the company or not (given in the lab column "churn"). 
Early stopping is enabled if the score is not improving in the short term.

In [9]:
automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'AUC_weighted'
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="churn",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

In [10]:
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
customer-churn,AutoML_02a88637-5957-40bb-9d85-ab32cf15d653,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [26]:
from azureml.widgets import RunDetails

RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

Experiment,Id,Type,Status,Details Page,Docs Page
customer-churn,AutoML_02a88637-5957-40bb-9d85-ab32cf15d653,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation




****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Cross validation
STATUS:       DONE
DESCRIPTION:  Each iteration of the trained model was validated through cross-validation.
              
DETAILS:      
+---------------------------------+
|Number of folds                  |
|3                                |
+---------------------------------+

****************************************************************************************************

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+---------------------------------+-----------------------------

{'runId': 'AutoML_02a88637-5957-40bb-9d85-ab32cf15d653',
 'target': 'capstone-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-05-08T16:49:56.208817Z',
 'endTimeUtc': '2021-05-08T17:09:51.470725Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'AUC_weighted',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'capstone-cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"4d037514-2225-431d-9742-15b321e38419\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-widgets": "1.27.0", "azureml-train": "1.27.0", "azureml-train-restclients-hyperdrive": "1.27.0", "azureml-train-core": "1.27.0", "azureml-train-automl": "1.27.0", "azureml-train-automl-runtime": "1.27.0", "azureml-train-automl-client":

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [28]:
best_run, fitted_model = remote_run.get_output()
print(best_run)
print(fitted_model)
best_run.get_tags()
best_run_metrics = best_run.get_metrics()
for m in best_run_metrics:
    metric = best_run_metrics[m]
    print(m, metric)

Run(Experiment: customer-churn,
Id: AutoML_02a88637-5957-40bb-9d85-ab32cf15d653_50,
Type: azureml.scriptrun,
Status: Completed)
Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                               reg_lambda=1.1458333333333335,
                                                                                               scale_pos_weight=1,
                       

In [29]:
best_run_metrics

{'weighted_accuracy': 0.9855713811793283,
 'f1_score_macro': 0.8686025560750575,
 'recall_score_micro': 0.9454088237131311,
 'balanced_accuracy': 0.8197294110185475,
 'log_loss': 0.20349933831762965,
 'average_precision_score_macro': 0.921533179694311,
 'precision_score_macro': 0.9485034589902304,
 'AUC_weighted': 0.9241789927412384,
 'precision_score_weighted': 0.9456853752369798,
 'accuracy': 0.9454088237131311,
 'f1_score_weighted': 0.9406941740939398,
 'norm_macro_recall': 0.6394588220370948,
 'average_precision_score_micro': 0.9672032663088771,
 'recall_score_macro': 0.8197294110185475,
 'average_precision_score_weighted': 0.9606744786228449,
 'matthews_correlation': 0.757070898382664,
 'AUC_macro': 0.9241789927412384,
 'precision_score_micro': 0.9454088237131311,
 'AUC_micro': 0.9741089251801157,
 'f1_score_micro': 0.9454088237131311,
 'recall_score_weighted': 0.9454088237131311,
 'confusion_matrix': 'aml://artifactId/ExperimentRun/dcid.AutoML_02a88637-5957-40bb-9d85-ab32cf15d653

In [None]:
joblib.dump(fitted_model, 'automl_model.pkl')

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [30]:
from azureml.core import Model

# Register model
best_run.register_model(model_path='outputs/model.pkl', model_name='automl_model',
                        tags={'Training context':'Auto ML'},
                        properties={'AUC': best_run_metrics['AUC_weighted'], 'Accuracy': best_run_metrics['accuracy']})

# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

automl_model version: 1
	 Training context : Auto ML
	 AUC : 0.9241789927412384
	 Accuracy : 0.9454088237131311




In [32]:
# create inference_config
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment

inference_config = InferenceConfig(entry_script="scoring.py")

In [31]:
from azureml.core.webservice import Webservice, AciWebservice
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb = 1)

In [33]:
service=Model.deploy(workspace=ws,
                    name="deploy-service",
                    models=[model],
                    inference_config=inference_config,
                    deployment_config=deployment_config)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-05-08 19:43:21+00:00 Creating Container Registry if not exists.
2021-05-08 19:43:22+00:00 Use the existing image.
2021-05-08 19:43:23+00:00 Generating deployment configuration.
2021-05-08 19:43:24+00:00 Submitting deployment to compute..
2021-05-08 19:43:32+00:00 Checking the status of deployment deploy-service..
2021-05-08 19:47:39+00:00 Checking the status of inference endpoint deploy-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [34]:
scoring_uri = service.scoring_uri

print(f'\nservice state: {service.state}\n')
print(f'scoring URI: \n{service.scoring_uri}\n')
print(f'swagger URI: \n{service.swagger_uri}\n')

print(service.scoring_uri)
print(service.swagger_uri)


service state: Healthy

scoring URI: 
http://399cd9c7-3b61-4554-9dd5-15dffb1af83d.southcentralus.azurecontainer.io/score

swagger URI: 
http://399cd9c7-3b61-4554-9dd5-15dffb1af83d.southcentralus.azurecontainer.io/swagger.json

http://399cd9c7-3b61-4554-9dd5-15dffb1af83d.southcentralus.azurecontainer.io/score
http://399cd9c7-3b61-4554-9dd5-15dffb1af83d.southcentralus.azurecontainer.io/swagger.json


TODO: In the cell below, send a request to the web service you deployed to test it.

In [40]:
df.columns

Index(['state', 'account_length', 'area_code', 'international_plan',
       'voice_mail_plan', 'number_vmail_messages', 'total_day_minutes',
       'total_day_calls', 'total_day_charge', 'total_eve_minutes',
       'total_eve_calls', 'total_eve_charge', 'total_night_minutes',
       'total_night_calls', 'total_night_charge', 'total_intl_minutes',
       'total_intl_calls', 'total_intl_charge',
       'number_customer_service_calls', 'churn'],
      dtype='object')

In [42]:

import requests
import json

data={"data":
  [{
     'state': "OH", 'account_length': 100, 'area_code': "area_code_415", 'international_plan':"no",
       'voice_mail_plan': "no", 'number_vmail_messages': 26 , 'total_day_minutes': 123.0,
       'total_day_calls': 42, 'total_day_charge': 42.0, 'total_eve_minutes': 42.0,
       'total_eve_calls': 42, 'total_eve_charge': 42.0, 'total_night_minutes': 42.0,
       'total_night_calls': 42, 'total_night_charge': 42.0, 'total_intl_minutes': 42.0,
       'total_intl_calls': 42, 'total_intl_charge': 42.0,
       'number_customer_service_calls': 42
     }, {
     'state': "OH", 'account_length': 100, 'area_code': "area_code_415", 'international_plan':"no",
       'voice_mail_plan': "no", 'number_vmail_messages': 26 , 'total_day_minutes': 123.0,
       'total_day_calls': 42, 'total_day_charge': 42.0, 'total_eve_minutes': 42.0,
       'total_eve_calls': 42, 'total_eve_charge': 42.0, 'total_night_minutes': 42.0,
       'total_night_calls': 42, 'total_night_charge': 42.0, 'total_intl_minutes': 42.0,
       'total_intl_calls': 42, 'total_intl_charge': 42.0,
       'number_customer_service_calls': 42
     }
     ]
  }

input_data=json.dumps(data)

headers={"Content-Type":"application/json"}

result=requests.post(scoring_uri, input_data, headers=headers)
print("Response Code: ",result.status_code)
print("Predicted Value: ",result.text)

The output is:  "TransformException:\n\tMessage: Must pass 2-d input\n\tInnerException: ValueError: Must pass 2-d input\n\tErrorResponse \n{\n    \"error\": {\n        \"code\": \"SystemError\",\n        \"message\": \"Encountered an internal AutoML error. Error Message/Code: TransformException. Additional Info: TransformException:\\n\\tMessage: Must pass 2-d input\\n\\tInnerException: None\\n\\tErrorResponse \\n{\\n    \\\"error\\\": {\\n        \\\"message\\\": \\\"Must pass 2-d input\\\",\\n        \\\"target\\\": \\\"PipelineWithYTransformations\\\",\\n        \\\"reference_code\\\": \\\"08fdc9b9-6ab8-4c46-9e21-da462e287594\\\"\\n    }\\n}\",\n        \"details_uri\": \"https://docs.microsoft.com/azure/machine-learning/resource-known-issues#automated-machine-learning\",\n        \"target\": \"PipelineWithYTransformations\",\n        \"inner_error\": {\n            \"code\": \"ClientError\",\n            \"inner_error\": {\n                \"code\": \"AutoMLInternal\"\n            }

TODO: In the cell below, print the logs of the web service and delete the service

In [None]:
print(service.get_logs())

In [None]:
service.delete()