# Model training with Automated ML

In the cell below, import all the dependencies that you will need to complete the project.

## Import and 

In [1]:
# Imports
import pandas as pd

In [2]:
# Azure ML Imports
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Workspace, Experiment
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig
import azureml.core

## Workspace

In [3]:
print("SDK version:", azureml.core.VERSION)
ws = Workspace.from_config()

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

SDK version: 1.20.0
Workspace name: quick-starts-ws-135760
Azure region: southcentralus
Subscription id: 9b72f9e6-56c5-4c16-991b-19c652994860
Resource group: aml-quickstarts-135760


## Compute

Create a remote GPU compute cluster for model training

In [4]:
# Choose a name for your CPU cluster
gpu_cluster_name = "gpu-cluster"

# Verify that cluster does not exist already
try:
    gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC24',
                                                           max_nodes=10)
    gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)

gpu_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Dataset

### Overview
TODO: In this markdown cell, give an overview of the dataset you are using. Also mention the task you will be performing.


TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [5]:


# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://www.kaggle.com/datamunge/sign-language-mnist"

found = False
key = "sign-language-mnist"
description_text = "sign Language MNIST"

if key in ws.datasets.keys(): 
    found = True
    ds = ws.datasets[key] 

if not found:    
    datastore_path = "https://github.com/emanbuc/ASL-Recognition-Deep-Learning/raw/main/datasets/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv"
    ds = TabularDatasetFactory.from_delimited_files(path=datastore_path,header=True)       
    #Register Dataset in Workspace
    ds = ds.register(workspace=ws,name=key,description=description_text)


df = ds.to_pandas_dataframe()
df.describe()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
count,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,...,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0,27455.0
mean,12.318813,145.419377,148.500273,151.247714,153.546531,156.210891,158.411255,160.472154,162.339683,163.954799,...,141.104863,147.495611,153.325806,159.125332,161.969259,162.736696,162.906137,161.966454,161.137898,159.824731
std,7.287552,41.358555,39.942152,39.056286,38.595247,37.111165,36.125579,35.016392,33.661998,32.651607,...,63.751194,65.512894,64.427412,63.708507,63.738316,63.444008,63.50921,63.298721,63.610415,64.396846
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,6.0,121.0,126.0,130.0,133.0,137.0,140.0,142.0,144.0,146.0,...,92.0,96.0,103.0,112.0,120.0,125.0,128.0,128.0,128.0,125.5
50%,13.0,150.0,153.0,156.0,158.0,160.0,162.0,164.0,165.0,166.0,...,144.0,162.0,172.0,180.0,183.0,184.0,184.0,182.0,182.0,182.0
75%,19.0,174.0,176.0,178.0,179.0,181.0,182.0,183.0,184.0,185.0,...,196.0,202.0,205.0,207.0,208.0,207.0,207.0,206.0,204.0,204.0
max,24.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,...,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0


## AutoML Configuration

**max_concurrent_iterations** : 10

AmlCompute clusters support one interation running per node. For multiple AutoML experiment parent runs executed in parallel on a single AmlCompute cluster, the sum of the max_concurrent_iterations values for all experiments should be less than or equal to the maximum number of nodes. Otherwise, runs will be queued until nodes are available.
Set to 10 as number of node in compute cluster.

**iteration_timeout_minutes** : 10

Maximum time in minutes that each iteration can run for before it terminates. 30 minutes to avoid Lab timeout.

**experiment_timeout_hours**: 1.1

Experiment must end before lab timeout. 
The ExperimentTimeout should be set more than 60 minutes with an input data of rows*cols(24709*784=19371856), and up to 10,000,000. The minimum allowed is 1.1

**enable_early_stopping**: true

Whether to enable early termination if the score is not improving in the short term. Set to True to avoid waste time. We don't need to try every possible iteration in this demo experiment.

**enable_onnx_compatible_models**: True

Whether to enable or disable enforcing the ONNX-compatible models. Must be True to anable deploy on ONNX runtime.




In [6]:
automl_settings = {
    "experiment_timeout_hours" : 1.1,
    #"experiment_exit_score": 0.999,
    "enable_early_stopping" : True,
    "iteration_timeout_minutes": 10,
    "max_concurrent_iterations": 10,
    "enable_onnx_compatible_models": True
}

automl_config = AutoMLConfig(
    debug_log='automl_errors.log',
    compute_target=gpu_cluster,
    task='classification',
    primary_metric='accuracy',
    training_data= ds,
    label_column_name='label',
    **automl_settings)

In [7]:
# Submit AutoML Experiment
experiment_name = 'ASL-DeepLearning-AutoML'
exp_automl = Experiment(workspace=ws, name=experiment_name)
automl_run = exp_automl.submit(automl_config)

Running on remote.


## Run Details

In [8]:
RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…


Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed fit featurizers and featurizing the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Train-Test data split
STATUS:       DONE
DESCRIPTION:  Your input data has been split into a training dataset and a holdout test dataset for validation of the model. The test holdout dataset reflects the original distribution of your input data.
              
DETAILS:      
+---------------------------------+---------------------------------+---------------------------------+
|Dataset                          |Row counts                       |Percentage            

{'runId': 'AutoML_cd06aae9-6dd8-4873-8b72-18ca4aa2d20a',
 'target': 'gpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-23T09:54:08.635748Z',
 'endTimeUtc': '2021-01-23T10:39:53.475178Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'gpu-cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"e46c2b0c-52cb-46ac-a915-6630b7add088\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 4, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://github.com/emanbuc/ASL-Recognition-Deep-Learning/raw/main/datasets/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv\\\\\\"}]}}, \\\\\\"localData\\\\\\": {}, 

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [9]:
best_automl_run, auto_ml_fitted_model = automl_run.get_output()

In [11]:
best_automl_run, auto_ml_fitted_model = automl_run.get_output()
print(best_automl_run)

Run(Experiment: ASL-DeepLearning-AutoML,
Id: AutoML_cd06aae9-6dd8-4873-8b72-18ca4aa2d20a_9,
Type: azureml.scriptrun,
Status: Completed)
Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('MaxAbsScaler', MaxAbsScaler(copy=True)),
                ('LogisticRegression',
                 LogisticRegression(C=339.3221771895323, class_weight=None,
                                    dual=False, fit_intercept=True,
                                    intercept_scaling=1, l1_ratio=N

In [None]:
print(auto_ml_fitted_model)

In [None]:
aml_fitted_model.steps[1][1].estimators

In [10]:
best_run_metrics = best_automl_run.get_metrics() # or other runs with runID
for metric_name in best_run_metrics:
     metric = best_run_metrics[metric_name]
     print(metric_name, metric)

recall_score_macro 1.0
precision_score_weighted 1.0
f1_score_weighted 1.0
accuracy 1.0
recall_score_weighted 1.0
precision_score_macro 1.0
f1_score_macro 1.0
log_loss 0.002074962476828893
precision_score_micro 1.0
matthews_correlation 1.0
weighted_accuracy 1.0
f1_score_micro 1.0
norm_macro_recall 1.0
recall_score_micro 1.0
AUC_macro 1.0
AUC_weighted 1.0
balanced_accuracy 1.0
AUC_micro 1.0
average_precision_score_micro 1.0
average_precision_score_weighted 1.0
average_precision_score_macro 1.0
accuracy_table aml://artifactId/ExperimentRun/dcid.AutoML_cd06aae9-6dd8-4873-8b72-18ca4aa2d20a_9/accuracy_table
confusion_matrix aml://artifactId/ExperimentRun/dcid.AutoML_cd06aae9-6dd8-4873-8b72-18ca4aa2d20a_9/confusion_matrix


## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [16]:
#TODO: Save the best model
model = best_automl_run.register_model(model_name='automl-model', model_path='outputs/model.pkl')

In [22]:
best_automl_run.get_file_names()

['accuracy_table',
 'automl_driver.py',
 'azureml-logs/55_azureml-execution-tvmps_8a48c432fd03a09b26859f5a5f6e222af05e9187788860e376a25845181e0d3d_d.txt',
 'azureml-logs/65_job_prep-tvmps_8a48c432fd03a09b26859f5a5f6e222af05e9187788860e376a25845181e0d3d_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_8a48c432fd03a09b26859f5a5f6e222af05e9187788860e376a25845181e0d3d_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'confusion_matrix',
 'logs/azureml/98_azureml.log',
 'logs/azureml/azureml_automl.log',
 'logs/azureml/dataprep/python_span_5c0430ee-18b8-45f4-82dd-ff9b32ff5f64.jsonl',
 'logs/azureml/dataprep/python_span_674bed08-d1ad-43ad-a485-579f9c28bbb2.jsonl',
 'logs/azureml/dataprep/python_span_c5a943b7-da92-43dc-b015-f4c47ffd4e84.jsonl',
 'logs/azureml/job_prep_azureml.log',
 'logs/azureml/job_release_azureml.log',
 'outputs/conda_env_v_1_0_0.yml',
 'outputs/env_dependencies.json',
 'outputs/model.pkl',
 'outputs/pipeline_graph.

In [24]:
best_automl_run.download_files()



In [30]:
# inference config
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig, Model


env = Environment.get(ws, "AzureML-AutoML").clone("my_env")

for pip_package in ["scikit-learn"]:
    env.python.conda_dependencies.add_pip_package(pip_package)

inference_config = InferenceConfig(entry_script='./outputs/scoring_file_v_1_0_0.py',
                                    environment=env)

In [32]:
from azureml.core.webservice import AciWebservice, AksWebservice, LocalWebservice
from azureml.core.model import InferenceConfig, Model

#For remote compute target
deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

#For local compute target
#deployment_config = LocalWebservice.deploy_configuration(port=8890)

from azureml.core.webservice import LocalWebservice, Webservice
service = Model.deploy(ws, "asl-automl-004", [model], inference_config, deployment_config)
service.wait_for_deployment(show_output = True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running............................................................................................................................................................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


TODO: In the cell below, send a request to the web service you deployed to test it.

In [41]:
import pickle
import pandas as pd
import numpy as np

#test_data = pd.read_csv("../datasets/sign-language-mnist/sign_mnist_test/sign_mnist_test.csv")
test_data = pd.read_csv("https://github.com/emanbuc/ASL-Recognition-Deep-Learning/raw/main/datasets/sign-language-mnist/sign_mnist_test.csv")

print("test_data: "+ str(test_data.shape))
X_test=test_data.iloc[1:10,1:785]
print("X_test: " +str(X_test.shape))

test_data: (7172, 785)
X_test: (9, 784)


In [42]:
actualLabels= test_data.iloc[1:10,0]
testdict= X_test.to_dict(orient="index")
inputList=(testdict[1],testdict[2],testdict[3],testdict[4],testdict[5],testdict[6],testdict[7],testdict[8],testdict[9])

In [38]:
import requests
import json
data = {"data": inputList}
# Convert to JSON string
input_data = json.dumps(data)
# URL for the web service, should be similar to:
# 'http://8530a665-66f3-49c8-a953-b82a2d312917.eastus.azurecontainer.io/score'
scoring_uri = 'http://3aa494b3-8edd-4467-8f8e-4a6e77466b04.southcentralus.azurecontainer.io/score'
# If the service is authenticated, set the key or token
key = ''


# Convert to JSON string
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print("predicted labels: ")
print(resp.json())

predicted labels: 
{"result": [5, 10, 0, 20, 21, 15, 14, 20, 7]}


In [39]:
print("actual labels: ")
print(actualLabels)

actual labels: 
1     5
2    10
3     0
4     3
5    21
6    10
7    14
8     3
9     7
Name: label, dtype: int64


TODO: In the cell below, print the logs of the web service and delete the service

In [40]:
service.get_logs()



In [43]:
service.delete()

In [44]:
gpu_cluster.delete()

Current provisioning state of AmlCompute is "Deleting"

