# Automated ML

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.19.0


## Initialize Workspace

In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code ERMWEEEX9 to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
AZML
RG-AZML
eastus
a748a023-896b-4719-8f8e-3d1ba62d6e35


## Create an Azure ML experiment


In [3]:
experiment_name = 'ideb-experiment'
project_folder = './automl-ideb'

experiment=Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
ideb-experiment,AZML,Link to Azure Machine Learning studio,Link to Documentation


## Create or Attach an AmlCompute cluster

In [4]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name = "auto-ml"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)

Creating
Succeeded..............................................................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


## Dataset


In [8]:
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
from azureml.core import Workspace, Dataset

subscription_id = 'a748a023-896b-4719-8f8e-3d1ba62d6e35'
resource_group = 'RG-AZML'
workspace_name = 'AZML'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='dataset_ideb')
dataset.to_pandas_dataframe()

Unnamed: 0,Sigla_UF,Cod_Municipio,Nome_Municipio,Cod_Escola,Nome_Escola,Column6,TAprov2019_Total,TAprov2019_1_serie,TAprov2019_2_serie,TAprov2019_3_serie,TAprov2019_4_serie,Indicador_Rendimento,SAEB2019_Matematica,SAEB2019_Lingua Portuguesa,SAEB2019_Nota Media Padronizada,IDEB_2019
0,BA,2914703.0,Itaberaba,29083524.0,EE - COLEGIO ESTADUAL LIBERDADE,Estadual,43.3,33.70,37.30,69.8,-,0.42,217.25,226.70,3.13,1.3
1,AM,1300060.0,Amaturá,13103431.0,ESC EST INDIGENA MANUEL JOAQUIM SALDANHA FILHO...,Estadual,67.3,59.10,53.50,92.5,-,0.65,195.40,188.21,2.25,1.5
2,PA,1506708.0,Santana do Araguaia,15171582.0,EEEM JORCELI SESTARI SILVA ANEXO IV,Estadual,57.1,52.40,50.00,72.2,-,0.57,202.24,211.64,2.70,1.5
3,PI,2211001.0,Teresina,22128441.0,U E NOSSA SENHORA DO PERPETUO SOCORRO,Estadual,53.8,25.00,58.30,97.5,-,0.45,239.39,237.57,3.61,1.6
4,PI,2211001.0,Teresina,22131159.0,CETI PEDRA MOLE,Estadual,41.7,30.60,31.10,78.7,-,0.39,259.57,260.92,4.24,1.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12077,PE,2611606.0,Recife,26124297.0,COLEGIO DE APLICACAO DO CE DA UFPE,Federal,98.9,96.70,100.00,100.0,-,0.99,402.96,362.42,7.77,7.7
12078,MG,3105608.0,Barbacena,31014630.0,ESCOLA PREPARATORIA DE CADETES DO AR,Federal,99.8,99.40,100.00,100.0,-,1.00,407.47,353.17,7.70,7.7
12079,,,,,,,,,,,,,,,,
12080,,,,,,,,,,,,,,,,


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [10]:
azureml.train.automl.utilities.get_primary_metrics("regression")

['normalized_mean_absolute_error',
 'normalized_root_mean_squared_error',
 'spearman_correlation',
 'r2_score']

In [11]:
# TODO: Put your automl settings here
automl_settings = {
     "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'r2_score'
}

# TODO: Put your automl config here
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "regression",
                             training_data=dataset,
                             label_column_name="IDEB_2019",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

In [12]:
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config, show_output = True)

Running on remote.
No run_configuration provided, running on auto-ml with default configuration
Running on remote compute: auto-ml
Parent Run ID: AutoML_65fb4522-a6ef-4b28-ac48-d2ceacc4be6d

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Cross validation
STATUS:       DONE
DESCRIPTION:  Each iteration of the trained model was validated through cross-validation.
              
DETAILS:      
+---------------------------------+
|Number of folds                  |
|3                                |
+---------------------------------+

********************************************************

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [14]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…



****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Cross validation
STATUS:       DONE
DESCRIPTION:  Each iteration of the trained model was validated through cross-validation.
              
DETAILS:      
+---------------------------------+
|Number of folds                  |
|3                                |
+---------------------------------+

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardinality feature detection
STATUS:       DONE
DESCRIPTION:  High cardinality features were det

{'runId': 'AutoML_65fb4522-a6ef-4b28-ac48-d2ceacc4be6d',
 'target': 'auto-ml',
 'status': 'Completed',
 'startTimeUtc': '2021-01-06T02:40:42.823067Z',
 'endTimeUtc': '2021-01-06T03:24:38.474123Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'r2_score',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'auto-ml',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"9b8c3907-edd6-457a-b452-95089bae425e\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"UI/01-06-2021_015830_UTC/DATASET_IDEB_ensino_medio_escolas_2019.txt\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"RG-AZML\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"a748a023-896b-4719-8f8e-3d1ba6

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [15]:
# Get the best run object
best_model, fitted_model = remote_run.get_output()
best_model.register_model(model_name = 'best_model', model_path = '/outputs/')
print(best_model)

Run(Experiment: ideb-experiment,
Id: AutoML_65fb4522-a6ef-4b28-ac48-d2ceacc4be6d_35,
Type: azureml.scriptrun,
Status: Completed)


In [16]:
print(fitted_model)

RegressionPipeline(pipeline=Pipeline(memory=None,
                                     steps=[('datatransformer',
                                             DataTransformer(enable_dnn=None,
                                                             enable_feature_sweeping=None,
                                                             feature_sweeping_config=None,
                                                             feature_sweeping_timeout=None,
                                                             featurization_config=None,
                                                             force_text_dnn=None,
                                                             is_cross_validation=None,
                                                             is_onnx_compatible=None,
                                                             logger=None,
                                                             observer=None,
                                         


## Metric in best model

In [17]:
best_model_metrics = best_model.get_metrics()
best_model_metrics

{'root_mean_squared_log_error': 0.008212349540547198,
 'r2_score': 0.9976401339553499,
 'spearman_correlation': 0.9987625843773339,
 'median_absolute_error': 0.02621277846742953,
 'normalized_median_absolute_error': 0.004095746635535864,
 'normalized_root_mean_squared_log_error': 0.006172777902969669,
 'mean_absolute_error': 0.030087664500081625,
 'root_mean_squared_error': 0.03904249789365686,
 'mean_absolute_percentage_error': 0.759912466013537,
 'explained_variance': 0.9976431520210984,
 'normalized_mean_absolute_error': 0.004701197578137754,
 'normalized_root_mean_squared_error': 0.006100390295883884,
 'residuals': 'aml://artifactId/ExperimentRun/dcid.AutoML_65fb4522-a6ef-4b28-ac48-d2ceacc4be6d_35/residuals',
 'predicted_true': 'aml://artifactId/ExperimentRun/dcid.AutoML_65fb4522-a6ef-4b28-ac48-d2ceacc4be6d_35/predicted_true'}

In [26]:
#TODO: Save the best model
env = best_model.get_environment()
script_file = 'score.py'
best_model.download_file('outputs/model.pkl', 'outputs/automl_ideb.pkl')
best_model.download_file('outputs/scoring_file_v_1_0_0.py', 'outputs/score_aml.py')
best_model.download_file('automl_driver.py', 'outputs/automl_driver.py')
import joblib  
joblib.dump(fitted_model, "automl_ideb.pkl")

['automl_ideb.pkl']

In [29]:
os.listdir(os.curdir), os.getcwd()

(['.config',
  '.ipynb_aml_checkpoints',
  'automl-ideb',
  'automl.ipynb',
  'automl.log',
  'automl_errors.log',
  'automl_ideb.pkl',
  'azureml_automl.log',
  'outputs',
  'score.py'],
 '/mnt/batch/tasks/shared/LS_root/mounts/clusters/compute-d2-v2/code/Users/anderson12')

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [60]:
model=best_model.register_model(
                        model_name = 'best_automl',
                        model_path = '/outputs/',
                        description = "Best model trained with AutoML")
model

Model(workspace=Workspace.create(name='AZML', subscription_id='a748a023-896b-4719-8f8e-3d1ba62d6e35', resource_group='RG-AZML'), name=best_automl, id=best_automl:14, version=14, tags={}, properties={})

## Define Deployment


In [62]:
print(service.get_logs())


2021-01-07T00:46:30,325728095+00:00 - gunicorn/run 
2021-01-07T00:46:30,336947455+00:00 - iot-server/run 
2021-01-07T00:46:30,341015977+00:00 - rsyslog/run 
2021-01-07T00:46:30,349860424+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_8eff28b157f42edcd2424a5aae6c8074/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
rsyslogd

In [78]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

inference_config = InferenceConfig(entry_script=script_file, environment=env)

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

deploy_service_name= 'deploy_service_ideb'

aci_service_name = 'service-ideb'
print(aci_service_name)
aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, deployment_config)


service-ideb


WebserviceException: WebserviceException:
	Message: Service service-ideb with the same name already exists, please use a different service name or delete the existing service.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Service service-ideb with the same name already exists, please use a different service name or delete the existing service."
    }
}

In [76]:
aci_service.wait_for_deployment(True)
print(aci_service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running...............................................................................
Failed


ERROR:azureml.core.webservice.webservice:Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 6f1191d6-2b2c-4d7a-b800-970a3f336b12
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: service-ideb. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\nYou can also try to run image viennaglobal.azurecr.io/azureml/azureml_4f3cee89203e005745d1830c04fe722a locally. Please refer to https://aka.ms/debugimage#service-launch-fails for mor

WebserviceException: WebserviceException:
	Message: Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 6f1191d6-2b2c-4d7a-b800-970a3f336b12
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: service-ideb. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\nYou can also try to run image viennaglobal.azurecr.io/azureml/azureml_4f3cee89203e005745d1830c04fe722a locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: service-ideb. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\nYou can also try to run image viennaglobal.azurecr.io/azureml/azureml_4f3cee89203e005745d1830c04fe722a locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information."
    },
    {
      "code": "AciDeploymentFailed",
      "message": "Your container application crashed. Please follow the steps to debug:\n1. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. Please refer to https://aka.ms/debugimage#dockerlog for more information.\n2. If your container application crashed. This may be caused by errors in your scoring file's init() function. You can try debugging locally first. Please refer to https://aka.ms/debugimage#debug-locally for more information.\n3. You can also interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\n4. View the diagnostic events to check status of container, it may help you to debug the issue. {\"restartCount\":5,\"currentState\":{\"state\":\"Waiting\",\"startTime\":null,\"exitCode\":null,\"finishTime\":null,\"detailStatus\":\"CrashLoopBackOff: Back-off restarting failed\"},\"previousState\":{\"state\":\"Terminated\",\"startTime\":\"2021-01-07T15:30:52.416Z\",\"exitCode\":111,\"finishTime\":\"2021-01-07T15:31:00.194Z\",\"detailStatus\":\"Error\"},\"events\":null}"
    }
  ]
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Service deployment polling reached non-successful terminal state, current service state: Failed\nOperation ID: 6f1191d6-2b2c-4d7a-b800-970a3f336b12\nMore information can be found using '.get_logs()'\nError:\n{\n  \"code\": \"AciDeploymentFailed\",\n  \"message\": \"Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\\nPlease check the logs for your container instance: service-ideb. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \\nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\\nYou can also try to run image viennaglobal.azurecr.io/azureml/azureml_4f3cee89203e005745d1830c04fe722a locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.\",\n  \"details\": [\n    {\n      \"code\": \"CrashLoopBackOff\",\n      \"message\": \"Your container application crashed. This may be caused by errors in your scoring file's init() function.\\nPlease check the logs for your container instance: service-ideb. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \\nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\\nYou can also try to run image viennaglobal.azurecr.io/azureml/azureml_4f3cee89203e005745d1830c04fe722a locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.\"\n    },\n    {\n      \"code\": \"AciDeploymentFailed\",\n      \"message\": \"Your container application crashed. Please follow the steps to debug:\\n1. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. Please refer to https://aka.ms/debugimage#dockerlog for more information.\\n2. If your container application crashed. This may be caused by errors in your scoring file's init() function. You can try debugging locally first. Please refer to https://aka.ms/debugimage#debug-locally for more information.\\n3. You can also interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\\n4. View the diagnostic events to check status of container, it may help you to debug the issue. {\\\"restartCount\\\":5,\\\"currentState\\\":{\\\"state\\\":\\\"Waiting\\\",\\\"startTime\\\":null,\\\"exitCode\\\":null,\\\"finishTime\\\":null,\\\"detailStatus\\\":\\\"CrashLoopBackOff: Back-off restarting failed\\\"},\\\"previousState\\\":{\\\"state\\\":\\\"Terminated\\\",\\\"startTime\\\":\\\"2021-01-07T15:30:52.416Z\\\",\\\"exitCode\\\":111,\\\"finishTime\\\":\\\"2021-01-07T15:31:00.194Z\\\",\\\"detailStatus\\\":\\\"Error\\\"},\\\"events\\\":null}\"\n    }\n  ]\n}"
    }
}

## Test the Deploy model
TODO: In the cell below, send a request to the web service you deployed to test it.

In [None]:
# scoring endpoint
scoring_uri = service.scoring_uri

data = {
  "data": [
    {
      "amount_tsh": 0,
      "date_recorded": "2013-02-04 00:00:00,000000",
      "funder": "Dmdd",
      "gps_height": 1996,
      "installer": "DMDD",
      "longitude": 35.2907992,
      "latitude": -4.05969643,
      "wpt_name": "Dinamu Secondary School",
      "num_private": 0,
      "basin": "Internal",
      "subvillage": "Magoma",
      "region": "Manyara",
      "region_code": 21,
      "district_code": 3,
      "lga": "Mbulu",
      "ward": "Bashay",
      "population": 321,
      "public_meeting": "True",
      "scheme_management": "Parastatal",
      "scheme_name": "",
      "permit": "True",
      "construction_year": 2012,
      "extraction_type": "other",
      "extraction_type_group": "other",
      "extraction_type_class": "other",
      "management": "parastatal",
      "management_group": "parastatal",
      "payment": "never pay",
      "payment_type": "never pay",
      "water_quality": "soft",
      "quality_group": "good",
      "quantity": "seasonal",
      "quantity_group": "seasonal",
      "source": "rainwater harvesting",
      "source_type": "rainwater harvesting",
      "source_class": "surface",
      "waterpoint_type": "other",
      "waterpoint_type_group": "other"
    }
  ]
}
# Convert to JSON string
input_data = json.dumps(data)

# Set the content type
headers = {'Content-Type': 'application/json'}

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())

TODO: In the cell below, print the logs of the web service and delete the service

In [None]:
logs = service.get_logs()
print(logs)
service.delete()