# Automated Machine Learning

In [2]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
 
import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.train.automl import AutoMLConfig
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.20.0


In [3]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

AZML
RG-AZML
eastus
a748a023-896b-4719-8f8e-3d1ba62d6e35


In [4]:
# Choose a name for the experiment.
experiment_name = 'ideb-regression'

experiment = Experiment(ws, experiment_name)

output = {}
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Run History Name'] = experiment_name
pd.set_option('display.max_colwidth', -1)
outputDf = pd.DataFrame(data = output, index = [''])
outputDf.T

Unnamed: 0,Unnamed: 1
Subscription ID,a748a023-896b-4719-8f8e-3d1ba62d6e35
Workspace,AZML
Resource Group,RG-AZML
Location,eastus
Run History Name,ideb-regression


## Using AmlCompute

In [5]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
cpu_cluster_name = "automl-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Found existing cluster, use it.

Jobrunning........
Running


## Data

### Load Data

In [6]:
datastore = ws.get_default_datastore()
path = "data/data.csv"
try:
    ideb_dataset = Dataset.get_by_name(ws, name="ideb_dataset")
except:
    datastore.upload('data', target_path='data')
    # Create TabularDataset & register in workspace
    ideb_dataset = Dataset.Tabular.from_delimited_files([(datastore, path)])
    ideb_dataset = ideb_dataset.register(
        ws, name="ideb_dataset", create_new_version=True,
        description="Dataset for ideb prediction"
    )

## Automl Settings

In [7]:
automl_settings = {
    "n_cross_validations": 5,
    "primary_metric": 'r2_score',
    "experiment_timeout_minutes": 15, 
    "max_concurrent_iterations": 4,
    "featurization": 'auto',
}

automl_config = AutoMLConfig(task = 'regression',
                             compute_target = compute_target,
                             training_data = ideb_dataset,
                             label_column_name = "IDEB_2019",
                             enable_early_stopping= True,
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )



## Run details

In [8]:
from azureml.widgets import RunDetails

remote_run = experiment.submit(automl_config)
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

Running on remote.


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

{'runId': 'AutoML_867f2963-9a49-44fb-93e9-7f304aa4d725',
 'target': 'automl-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-22T01:48:02.595449Z',
 'endTimeUtc': '2021-01-22T02:08:39.910049Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'r2_score',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'automl-cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"823dab97-9480-4378-8c08-4aec261bc0f3\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"UI/01-20-2021_015249_UTC/data.CSV\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"RG-AZML\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"a748a023-896b-4719-8f8e-3d1ba62d6e35\\\\\\", \\\\\\

In [11]:
!pip install --upgrade --upgrade-strategy eager azureml-sdk==1.20.0

Requirement already up-to-date: azureml-sdk==1.20.0 in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (1.20.0)
Requirement already up-to-date: azureml-pipeline~=1.20.0 in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (from azureml-sdk==1.20.0) (1.20.0)
Requirement already up-to-date: azureml-train~=1.20.0 in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (from azureml-sdk==1.20.0) (1.20.0)
Requirement already up-to-date: azureml-train-automl-client~=1.20.0 in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (from azureml-sdk==1.20.0) (1.20.0)
Requirement already up-to-date: azureml-dataset-runtime[fuse]~=1.20.0 in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (from azureml-sdk==1.20.0) (1.20.0)
Requirement already up-to-date: azureml-core~=1.20.0 in /anaconda/envs/azureml_py36/lib/python3.6/site-packages (from azureml-sdk==1.20.0) (1.20.0)
Requirement already up-to-date: azureml-pipeline-steps~=1.20.0 in /anaconda/envs/azureml_py36/lib/pytho

## Best Model

In [12]:
best_automl_run, fitted_model = remote_run.get_output()
print(best_automl_run)
print(fitted_model)

Run(Experiment: ideb-regression,
Id: AutoML_867f2963-9a49-44fb-93e9-7f304aa4d725_35,
Type: azureml.scriptrun,
Status: Completed)
RegressionPipeline(pipeline=Pipeline(memory=None,
                                     steps=[('datatransformer',
                                             DataTransformer(enable_dnn=None,
                                                             enable_feature_sweeping=None,
                                                             feature_sweeping_config=None,
                                                             feature_sweeping_timeout=None,
                                                             featurization_config=None,
                                                             force_text_dnn=None,
                                                             is_cross_validation=None,
                                                             is_onnx_compatible=None,
                                                             l

In [14]:
best_automl_run_metrics = best_automl_run.get_metrics()
best_automl_run_metrics

{'mean_absolute_error': 0.029116858816360495,
 'normalized_median_absolute_error': 0.003976936118452571,
 'normalized_root_mean_squared_log_error': 0.005984790055152333,
 'spearman_correlation': 0.9988538379347626,
 'normalized_mean_absolute_error': 0.004549509190056327,
 'r2_score': 0.997803647174114,
 'explained_variance': 0.9978046527882583,
 'normalized_root_mean_squared_error': 0.005878769186393657,
 'root_mean_squared_log_error': 0.00796224789426758,
 'root_mean_squared_error': 0.03762412279291941,
 'median_absolute_error': 0.025452391158096454,
 'mean_absolute_percentage_error': 0.7356982514360096,
 'residuals': 'aml://artifactId/ExperimentRun/dcid.AutoML_867f2963-9a49-44fb-93e9-7f304aa4d725_35/residuals',
 'predicted_true': 'aml://artifactId/ExperimentRun/dcid.AutoML_867f2963-9a49-44fb-93e9-7f304aa4d725_35/predicted_true'}

## Save the best model

In [15]:
import joblib  
joblib.dump(fitted_model, "./model.pkl")

['./model.pkl']

In [18]:
script_file = 'score.py'
best_automl_run.download_file('outputs/scoring_file_v_1_0_0.py', script_file)

In [19]:
os.listdir(os.curdir), os.getcwd()

(['.azureml',
  '.config',
  '.ipynb_aml_checkpoints',
  '.ipynb_checkpoints',
  'automl.ipynb',
  'automl.ipynb.amltemp',
  'automl.log',
  'automl_errors.log',
  'automl_old.ipynb',
  'automl_old.ipynb.amltemp',
  'azureml_automl.log',
  'conda_dependencies.yml',
  'config.json',
  'hyperparameter_tuning.ipynb',
  'hyperparameter_tuning.ipynb.amltemp',
  'model.pkl',
  'outputs',
  'score.py',
  'train.py',
  'train.py.amltemp'],
 '/mnt/batch/tasks/shared/LS_root/mounts/clusters/automl-cluster/code/Users/anderson12')

## Environment


In [20]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

env = Environment.get(workspace=ws, name="AzureML-Tutorial")
#env.python.conda_dependencies.add_pip_package("scikit-learn")
#env = Environment.from_conda_specification(name='automl',file_path='conda_dependencies.yml')

## Model Deployment

In [24]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice, Webservice


from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import AciWebservice, Webservice

# Register the model to deploy
model = best_automl_run.register_model(model_name = "best_automl_model", model_path = "outputs/model.pkl", description = "Best model trained with AutoML")

# Combine scoring script & environment in Inference configuration
inference_config = InferenceConfig(entry_script="score.py", environment=env)

# Set deployment configuration
deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

# Define the model, inference, & deployment configuration and web service name and location to deploy
service = Model.deploy(
    workspace = ws,
    name = "ideb-service",
    models = [model],
    inference_config = inference_config,
    deployment_config = deployment_config)

service.wait_for_deployment(show_output=True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running......................................................................................
Failed


ERROR:azureml.core.webservice.webservice:Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 0601010e-5fb5-4d7d-a157-d970d638e049
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Error in entry script, ModuleNotFoundError: No module named 'inference_schema', please run print(service.get_logs()) to get details.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Error in entry script, ModuleNotFoundError: No module named 'inference_schema', please run print(service.get_logs()) to get details."
    }
  ]
}



WebserviceException: WebserviceException:
	Message: Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 0601010e-5fb5-4d7d-a157-d970d638e049
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Error in entry script, ModuleNotFoundError: No module named 'inference_schema', please run print(service.get_logs()) to get details.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Error in entry script, ModuleNotFoundError: No module named 'inference_schema', please run print(service.get_logs()) to get details."
    }
  ]
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Service deployment polling reached non-successful terminal state, current service state: Failed\nOperation ID: 0601010e-5fb5-4d7d-a157-d970d638e049\nMore information can be found using '.get_logs()'\nError:\n{\n  \"code\": \"AciDeploymentFailed\",\n  \"message\": \"Aci Deployment failed with exception: Error in entry script, ModuleNotFoundError: No module named 'inference_schema', please run print(service.get_logs()) to get details.\",\n  \"details\": [\n    {\n      \"code\": \"CrashLoopBackOff\",\n      \"message\": \"Error in entry script, ModuleNotFoundError: No module named 'inference_schema', please run print(service.get_logs()) to get details.\"\n    }\n  ]\n}"
    }
}

In [25]:
print(service.get_logs())

2021-01-22T02:27:39,152566363+00:00 - gunicorn/run 
2021-01-22T02:27:39,152293661+00:00 - iot-server/run 
2021-01-22T02:27:39,154840576+00:00 - rsyslog/run 
2021-01-22T02:27:39,160087806+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_a1736710baabf05d37337a101fa8d6d1/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a1736710baabf05d37337a101fa8d6d1/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a1736710baabf05d37337a101fa8d6d1/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a1736710baabf05d37337a101fa8d6d1/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a1736710baabf05d37337a101fa8d6d1/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
rsyslogd

## Test the Deploy model

In [None]:
scoring_uri = service.scoring_uri

print(f'\nservice state: {service.state}\n')
print(f'scoring URI: \n{service.scoring_uri}\n')
print(f'swagger URI: \n{service.swagger_uri}\n')

print(service.scoring_uri)
print(service.swagger_uri)

In [None]:
scoring_uri='http://8d460a6f-6b7c-4e77-a2c1-5c5bb835e7ab.eastus.azurecontainer.io/score'
key=''
headers = {'Content-Type': 'application/json'}

In [None]:
import json

test_sample = json.dumps({'ideb_dataset': x[0:2].tolist()})

prediction = service.run(test_sample)

print(prediction)

## View the service logs

In [None]:
service.get_logs()


In [None]:
service.delete()