# Automated Machine Learning

In [1]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
 

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.train.automl import AutoMLConfig
from azureml.data.dataset_factory import TabularDatasetFactory

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.20.0


In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

AZML
RG-AZML
eastus
a748a023-896b-4719-8f8e-3d1ba62d6e35


In [3]:
# Choose a name for the experiment.
experiment_name = 'ideb-regression'

experiment = Experiment(ws, experiment_name)

output = {}
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Run History Name'] = experiment_name
pd.set_option('display.max_colwidth', -1)
outputDf = pd.DataFrame(data = output, index = [''])
outputDf.T

Unnamed: 0,Unnamed: 1
Subscription ID,a748a023-896b-4719-8f8e-3d1ba62d6e35
Workspace,AZML
Resource Group,RG-AZML
Location,eastus
Run History Name,ideb-regression


## Using AmlCompute

In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
cpu_cluster_name = "automl-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Found existing cluster, use it.

Jobrunning...................................................................................................................................................................................................................................................................
Running


## Data

### Load Data

In [7]:
datastore = ws.get_default_datastore()
path = "data/data.csv"
try:
    ideb_dataset = Dataset.get_by_name(ws, name="ideb_dataset")
except:
    datastore.upload('data', target_path='data')
    # Create TabularDataset & register in workspace
    ideb_dataset = Dataset.Tabular.from_delimited_files([(datastore, path)])
    ideb_dataset = ideb_dataset.register(
        ws, name="ideb_dataset", create_new_version=True,
        description="Dataset for ideb prediction"
    )

## Automl Settings

In [8]:
automl_settings = {
    "n_cross_validations": 3,
    "primary_metric": 'r2_score',
    "experiment_timeout_hours": 0.6, 
    "max_concurrent_iterations": 4,
    "featurization": 'auto',
    "verbosity": logging.INFO,
}

automl_config = AutoMLConfig(task = 'regression',
                             compute_target = compute_target,
                             training_data = ideb_dataset,
                             label_column_name = "IDEB_2019",
                             **automl_settings
                            )



## Result

In [9]:
from azureml.widgets import RunDetails

remote_run = experiment.submit(automl_config, show_output = True)
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

Running on remote.
No run_configuration provided, running on automl-cluster with default configuration
Running on remote compute: automl-cluster
Parent Run ID: AutoML_1cf7d1e4-eb24-4b07-b562-79b16f8596d0

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardinality feature detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and no high cardinality features were detected.
              Learn more about high ca

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…



****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardinality feature detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and no high cardinality features were detected.
              Learn more about high cardinality feature handling: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

****************************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary de

{'runId': 'AutoML_1cf7d1e4-eb24-4b07-b562-79b16f8596d0',
 'target': 'automl-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-25T22:04:05.492107Z',
 'endTimeUtc': '2021-01-25T22:46:25.240685Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'r2_score',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '3',
  'target': 'automl-cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"823dab97-9480-4378-8c08-4aec261bc0f3\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"UI/01-20-2021_015249_UTC/data.CSV\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"RG-AZML\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"a748a023-896b-4719-8f8e-3d1ba62d6e35\\\\\\", \\\\\\

## Create an environment

In [11]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.2
- pip=20.2.4
- pip:
  - azureml-core==1.20.0
  - azureml-pipeline-core==1.20.0
  - azureml-telemetry==1.20.0
  - azureml-defaults==1.20.0
  - azureml-interpret==1.20.0
  - azureml-automl-core==1.20.0
  - azureml-automl-runtime==1.20.0
  - azureml-train-automl-client==1.20.0
  - azureml-train-automl-runtime==1.20.0.post1
  - azureml-dataset-runtime==1.20.0
  - inference-schema
  - py-cpuinfo==5.0.0
  - boto3==1.15.18
  - botocore==1.18.18
- numpy~=1.18.0
- scikit-learn==0.22.1
- pandas~=0.25.0
- fbprophet==0.5
- holidays==0.9.11
- setuptools-git
- psutil>5.0.0,<6.0.0


Overwriting conda_dependencies.yml


In [12]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

env = Environment.get(workspace=ws, name="AzureML-AutoML")
#env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')


## Best Model

In [13]:
best_run, fitted_model = remote_run.get_output()
print(best_run)
print(fitted_model)

Run(Experiment: ideb-regression,
Id: AutoML_1cf7d1e4-eb24-4b07-b562-79b16f8596d0_104,
Type: azureml.scriptrun,
Status: Completed)
RegressionPipeline(pipeline=Pipeline(memory=None,
                                     steps=[('datatransformer',
                                             DataTransformer(enable_dnn=None,
                                                             enable_feature_sweeping=None,
                                                             feature_sweeping_config=None,
                                                             feature_sweeping_timeout=None,
                                                             featurization_config=None,
                                                             force_text_dnn=None,
                                                             is_cross_validation=None,
                                                             is_onnx_compatible=None,
                                                             

## Metric in best model

In [14]:
best_run_metrics = best_run.get_metrics()
best_run_metrics

{'normalized_root_mean_squared_log_error': 0.005884586821424402,
 'normalized_median_absolute_error': 0.00396045975066051,
 'mean_absolute_percentage_error': 0.7338530438282841,
 'root_mean_squared_error': 0.037063558987341656,
 'normalized_mean_absolute_error': 0.0045446443324167534,
 'root_mean_squared_log_error': 0.007828936119017896,
 'normalized_root_mean_squared_error': 0.005791181091772134,
 'median_absolute_error': 0.025346942404227264,
 'spearman_correlation': 0.9988622462491957,
 'explained_variance': 0.9978734918257425,
 'mean_absolute_error': 0.029085723727467223,
 'r2_score': 0.9978726380803238,
 'predicted_true': 'aml://artifactId/ExperimentRun/dcid.AutoML_1cf7d1e4-eb24-4b07-b562-79b16f8596d0_104/predicted_true',
 'residuals': 'aml://artifactId/ExperimentRun/dcid.AutoML_1cf7d1e4-eb24-4b07-b562-79b16f8596d0_104/residuals'}

## Save the best model

In [15]:
best_run.get_file_names ()

['automl_driver.py',
 'azureml-logs/55_azureml-execution-tvmps_959b442c5a0ffa1d4b1ab577b7180775caf83e201c43664cdc30520d8654fe8c_d.txt',
 'azureml-logs/65_job_prep-tvmps_959b442c5a0ffa1d4b1ab577b7180775caf83e201c43664cdc30520d8654fe8c_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_959b442c5a0ffa1d4b1ab577b7180775caf83e201c43664cdc30520d8654fe8c_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'explanation/487970a9/eval_data_viz.interpret.json',
 'explanation/487970a9/expected_values.interpret.json',
 'explanation/487970a9/features.interpret.json',
 'explanation/487970a9/global_names/0.interpret.json',
 'explanation/487970a9/global_rank/0.interpret.json',
 'explanation/487970a9/global_values/0.interpret.json',
 'explanation/487970a9/local_importance_values.interpret.json',
 'explanation/487970a9/rich_metadata.interpret.json',
 'explanation/487970a9/visualization_dict.interpret.json',
 'explanation/487970a9/ys_pred_viz.interpret

In [16]:
best_run.download_file('./outputs/model.pkl', './outputs/automl_ideb.pkl')
best_run.download_file('./outputs/scoring_file_v_1_0_0.py', './score.py')
best_run.download_file('automl_driver.py', './outputs/automl_driver.py')



In [18]:
os.listdir(os.curdir), os.getcwd()

(['.azureml',
  '.config',
  '.ipynb_aml_checkpoints',
  '.ipynb_checkpoints',
  'automl.ipynb',
  'automl.ipynb.amltemp',
  'automl.log',
  'automl_errors.log',
  'automl_old.ipynb.amltemp',
  'azureml_automl.log',
  'conda_dependencies.yml',
  'config.json',
  'hyperparameter_tuning.ipynb',
  'hyperparameter_tuning.ipynb.amltemp',
  'outputs',
  'score.py',
  'score.py.amltemp',
  'train.py',
  'train.py.amltemp'],
 '/mnt/batch/tasks/shared/LS_root/mounts/clusters/automl-cluster/code/Users/anderson12')

## Model Deployment

In [19]:
model=best_run.register_model(
                        model_name = 'best_model',
                        model_path = './outputs/model.pkl',
                        description = "Best model trained with AutoML")
model

Model(workspace=Workspace.create(name='AZML', subscription_id='a748a023-896b-4719-8f8e-3d1ba62d6e35', resource_group='RG-AZML'), name=best_model, id=best_model:20, version=20, tags={}, properties={})

In [20]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)
inference_config = InferenceConfig(entry_script='./score.py', environment=env)

service = model.deploy(
    workspace=ws,
    name="ideb-service",
    models=[model],
    inference_config=inference_config,
    deployment_config=aci_config,
    overwrite=True
)
service.wait_for_deployment(show_output=True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running....................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


## Test the Deploy model

In [21]:
best_run.get_file_names ()

['automl_driver.py',
 'azureml-logs/55_azureml-execution-tvmps_959b442c5a0ffa1d4b1ab577b7180775caf83e201c43664cdc30520d8654fe8c_d.txt',
 'azureml-logs/65_job_prep-tvmps_959b442c5a0ffa1d4b1ab577b7180775caf83e201c43664cdc30520d8654fe8c_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_959b442c5a0ffa1d4b1ab577b7180775caf83e201c43664cdc30520d8654fe8c_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'explanation/487970a9/eval_data_viz.interpret.json',
 'explanation/487970a9/expected_values.interpret.json',
 'explanation/487970a9/features.interpret.json',
 'explanation/487970a9/global_names/0.interpret.json',
 'explanation/487970a9/global_rank/0.interpret.json',
 'explanation/487970a9/global_values/0.interpret.json',
 'explanation/487970a9/local_importance_values.interpret.json',
 'explanation/487970a9/rich_metadata.interpret.json',
 'explanation/487970a9/visualization_dict.interpret.json',
 'explanation/487970a9/ys_pred_viz.interpret

In [22]:
scoring_uri = service.scoring_uri

print(f'\nservice state: {service.state}\n')
print(f'scoring URI: \n{service.scoring_uri}\n')
print(f'swagger URI: \n{service.swagger_uri}\n')


service state: Healthy

scoring URI: 
http://b48b3b5a-4612-4df8-ba00-817d0358f6e9.eastus.azurecontainer.io/score

swagger URI: 
http://b48b3b5a-4612-4df8-ba00-817d0358f6e9.eastus.azurecontainer.io/swagger.json



In [23]:
import requests
import json

# scoring endpoint
scoring_uri = service.scoring_uri


# If the service is authenticated, set the key or token
#key = '<your key or token>'

# Two sets of data to score, so we get two results back
data = {"data":
        [
          {
           "TAprov2019_Total": 99.9, 
           "TAprov2019_1_serie": 99.2, 
           "TAprov2019_2_serie": 59.1, 
           "TAprov2019_3_serie": 60.5, 
           "TAprov2019_4_serie": 70.5, 
           "Indicador_Rendimento": 0.99, 
           "SAEB2019_Matematica": 365.38, 
           "SAEB2019_Lingua Portuguesa": 351.54, 
           "SAEB2019_Nota Media Padronizada": 7.055853
          },
      ]
    }
# Convert to JSON string
input_data = json.dumps(data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
#headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)

"{\"result\": [6.903481911249511]}"


## View the service logs

In [24]:
print(service.get_logs())

2021-01-26T02:57:40,716292919+00:00 - iot-server/run 
2021-01-26T02:57:40,717473498+00:00 - gunicorn/run 
2021-01-26T02:57:40,717618608+00:00 - rsyslog/run 
2021-01-26T02:57:40,721289154+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_265db83b0c6014ce472c5de2f0b97e04/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_265db83b0c6014ce472c5de2f0b97e04/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_265db83b0c6014ce472c5de2f0b97e04/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_265db83b0c6014ce472c5de2f0b97e04/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_265db83b0c6014ce472c5de2f0b97e04/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
rsyslogd

In [23]:
service.delete()