# Automated ML

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

[Wine Quality Data Set](https://archive.ics.uci.edu/ml/datasets/Wine+Quality)

In [None]:
from azureml.core.workspace import Workspace
from azureml.core.datastore import Datastore
from azureml.core.compute import ComputeTarget
from azureml.core.compute.amlcompute import AmlCompute
from azureml.exceptions import ComputeTargetException
from azureml.core.experiment import Experiment
from azureml.core.run import Run
from azureml.core.dataset import Dataset

from azureml.core import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice


from azureml.core.webservice import Webservice
from azureml.core.authentication import InteractiveLoginAuthentication

import pandas as pd

from azureml.pipeline.core.pipeline import Pipeline
from azureml.pipeline.core import PipelineData
from azureml.pipeline.core import TrainingOutput
from azureml.pipeline.core.run import PipelineRun
from azureml.pipeline.steps.automl_step import AutoMLStep

from azureml.train.automl.automlconfig import AutoMLConfig
from azureml.data import TabularDataset
from azureml.widgets.run_details import RunDetails

import json
import pickle
import requests

from pprint import pprint

import logging
import joblib

In [None]:
CAPSTONE_FOLDER = 'capstone-ml'
CAPSTONE_DEBUG_LOG = 'capstone-ml.log'
CAPSTONE_LABEL_COLUMN_NAME = 'quality'

CAPSTONE_AUTOMLSTEP_NAME = 'AutoML Training Step'

CAPSTONE_EXPERIMENT_NAME_AUTOML = 'exp-capstone-automl'
CAPSTONE_EXPERIMENT_NAME_STEP7 = 'exp-capstone-step7'
CAPSTONE_TABULAR_WINE_DATA = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'
CAPSTONE_DATASET_NAME = 'White Wine Data'
CAPSTONE_DATASET_DESCRIPTION = 'Wine data - does the wine taste good?'

CAPSTONE_DEPLOYED_MODEL_NAME = 'wine-taste-automl'
CAPSTONE_DEPLOYED_MODEL_PATH = './outputs/best_automl.pkl'
CAPSTONE_MODEL_DESCRIPTION = 'AutoML Registered Model'
CAPSTONE_SOURCE_DIRECTORY = './source_dir'
CAPSTONE_SCORING_SCRIPT = 'score.py'

CAPSTONE_PIPELINEDATA_METRICS_NAME = 'PipelineData_Metrics' 
CAPSTONE_PIPELINEDATA_MODEL_NAME = 'PipelineData_Model' 
CAPSTONE_PIPELINE_OUTPUT_METRICS_NAME = 'Pipeline Metrics Output' 
CAPSTONE_PIPELINE_OUTPUT_MODEL_NAME = 'Pipeline Model Output' 
CAPSTONE_PIPELINE_DESCRIPTION = 'AutoML Pipeline to train model on the wine data'
CAPSTONE_EXPERIMENT_NAME = 'AutoML Train Wine Data Experiment'
CAPSTONE_ENV_SERVICE = 'capstone-env-service'

CAPSTONE_PUBLISHED_PIPELINE_NAME = 'Wine Data Training Pipeline'
CAPSTONE_PUBLISHED_PIPELINE_DESCRIPTION = 'This pipeline trains on the Wine Data'
CAPSTONE_PUBLISHED_PIPELINE_VERSION='1.0'

CAPSTONE_CONSUME_PIPELINE_ENDPOINT_EXPERIMENT = 'exp-run-pipeline' #

CC_AUTOML = "CPU-CC-AUTOML"  # CPU Compute Cluster for AUTOML
CURATED_ENV_NAME = 'AzureML-Tutorial'

TRAIN_DATA_DIR = 'train_norm_data'
TRAIN_DATA_FILE = 'train_norm.csv'
TRAIN_NORM_INFO_FILE = 'norm.csv'


# constants for HyperML

CAPSTONE_EXPERIMENT_NAME_HYPER = 'exp-capstone-hyper'
CAPSTONE_DEPLOYED_HYPER_MODEL_NAME = 'wine-taste-hyper'
CAPSTONE_DEPLOYED_HYPER_MODEL_PATH = 'outputs/best_run_hyperdrive.pkl' 

CC_HYPERML = "CPU-CC-HyperML"  # CPU Compute Cluster for HYPERML

## Dataset

### Overview
This machine learning program detects the wine quality of white wine.
The task is to determine if the wine is tasty or not.


TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [None]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = CAPSTONE_EXPERIMENT_NAME_AUTOML

experiment=Experiment(ws, experiment_name)

In [None]:
# Next, let's use if it exists, or create if required, a compute cluster to be used by the ML

# Access the compute cluster. If it exists, we will have the compute object. 
# If it does not exist, an exception will be thrown upon which the compute cluster is created
try:
    cc = ComputeTarget(workspace=ws, name=CC_AUTOML)
    print(f'Compute Cluster target exists and we have a handle to the same')
except ComputeTargetException:
    # Failed to obtain the compute cluster object
    # In all likelihood, a compute cluster of that name has not been created
    # Attempt to create the compute cluster
    # First set up the configuration

    # Specify the configuration of the compute cluster
    cc_cfg = AmlCompute.provisioning_configuration(vm_size='Standard_DS12_v2', min_nodes=1, max_nodes=6)
    cc = ComputeTarget.create(workspace=ws, name=CC_AUTOML, provisioning_configuration=cc_cfg)

# At this point - we have access to the compute cluster object. Wait for the compute target to complete provisioing
cc.wait_for_completion(show_output='True')

InProgress....
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded......................
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [None]:
dsets

KeysView({'White Wine Data': DatasetRegistration(id='c7751451-9733-4eba-baaa-cef06b1f2e62', name='White Wine Data', version=1, description='Wine data - does the wine taste good?', tags={})})

In [None]:
# grab the data and create a dataset
# See if the dataset already exists - if so, skip the Dataset creation pieces
data_uri = CAPSTONE_TABULAR_WINE_DATA

ds_name = CAPSTONE_DATASET_NAME
dsets = ws.datasets.keys()

if ds_name in dsets:
    # dataset exists
    proj_ds = ws.datasets[ds_name]
else:
    print(f'STOP!!!!')
    # Data set not found. Must create it
    proj_ds = Dataset.Tabular.from_delimited_files(data_uri)
    # Register the dataset so that on repeated runs, the data does not have to be fetched evey time
    proj_ds = proj_ds.register(workspace=ws, name=ds_name, description=CAPSTONE_DATASET_DESCRIPTION)

# Take a peek at the data by converting the same to a Pandas dataframe
proj_df = proj_ds.to_pandas_dataframe()

# print the data
proj_df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,2.186612,0.315085,3.270609,1.569104,-0.081122,-0.488506,1.238629,2.040392,-1.511694,3.418622,-1.474270,0
1,-0.538932,-0.478608,-0.034635,-1.033390,-0.081122,-1.164692,1.862187,-0.677847,0.938628,-0.436771,0.394706,0
2,1.357099,0.513509,0.130627,0.711464,-0.493060,-0.782500,-0.714401,0.181424,-0.452095,-1.137752,0.882265,0
3,-1.842453,1.902473,-1.935150,-1.003816,-0.447289,-1.429287,-1.514437,-1.072376,1.932001,0.439455,0.557225,0
4,-0.301928,-0.180973,1.865881,1.776120,0.330815,-0.194512,0.062105,1.361668,-0.518320,0.001342,-0.986711,0
...,...,...,...,...,...,...,...,...,...,...,...,...
3913,0.764589,1.406414,-1.191470,0.100272,-0.218435,1.569454,0.273879,0.351941,0.276379,-1.050129,-0.417892,0
3914,1.475600,-1.569937,0.378521,-1.082680,-0.538831,0.040684,-1.232072,-1.259610,-2.571292,0.527077,1.207304,0
3915,2.068110,0.910356,3.022716,1.135355,-0.309976,-0.841299,0.462123,1.428538,-1.246794,2.104284,-0.986711,0
3916,-1.486947,0.414297,-0.034635,0.198851,-0.401518,-0.606103,-1.020297,-0.450491,0.408829,0.001342,0.475966,0


In [None]:
proj_df.describe()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
count,3918.0,3918.0,3918.0,3918.0,3918.0,3918.0,3918.0,3918.0,3918.0,3918.0,3918.0,3918.0
mean,-0.006188,-0.005162,-0.003316,0.009996,0.006599,-0.0086,-0.003108,0.005267,-0.003091,0.001632,0.00366,0.215926
std,1.000857,1.002506,0.991018,1.002611,1.034684,0.999488,0.993988,1.007274,0.998251,0.99855,1.005135,0.411516
min,-3.619982,-1.966784,-2.761461,-1.141827,-1.683102,-1.899678,-3.043919,-2.312802,-3.101091,-2.364468,-2.043089,0.0
25%,-0.657434,-0.677032,-0.530422,-0.924953,-0.447289,-0.723701,-0.714401,-0.76812,-0.65077,-0.699639,-0.824192,0.0
50%,-0.064924,-0.180973,-0.117266,-0.215182,-0.126893,-0.076914,-0.102608,-0.079366,-0.054746,-0.086281,-0.092853,0.0
75%,0.527585,0.414297,0.378521,0.706535,0.193503,0.569873,0.673898,0.72641,0.607503,0.527077,0.719745,0.0
max,8.704217,8.152811,10.955302,11.712916,13.741673,14.916791,7.09772,15.029763,4.183648,4.995829,2.87313,1.0


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [None]:
# TODO: Put your automl settings here

automl_settings = {
    "iterations" : 20,
    "experiment_timeout_minutes" : 30,
    "enable_early_stopping" : True,
    "iteration_timeout_minutes" : 5,
    "max_concurrent_iterations" : 5,
    "max_cores_per_iteration" : -1,
    "n_cross_validations" : 3,
    "primary_metric" : 'AUC_weighted',
    "verbosity" : logging.INFO,
}

# Provide the remainder of the settings/configuration
# Note that we are not providing a validation data set - and we may need to
# 


# TODO: Put your automl config here
automl_config = AutoMLConfig(
    compute_target = cc,
    task='classification',
    training_data=proj_ds,
    label_column_name=CAPSTONE_LABEL_COLUMN_NAME,
    path=CAPSTONE_FOLDER,
    featurization='auto',
    model_explainability=True,
    debug_log=CAPSTONE_DEBUG_LOG,
    **automl_settings)

In [None]:
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
exp-capstone-automl,AutoML_f270b76e-c5be-4fc5-b8e0-7ae67ebd55e6,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [None]:
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [None]:
def print_model(model, prefix=""):
    for step in model.steps:
        print(prefix + step[0])
        if hasattr(step[1], 'estimators') and hasattr(step[1], 'weights'):
            pprint({'estimators': list(e[0] for e in step[1].estimators), 'weights': step[1].weights})
            print()
            for estimator in step[1].estimators:
                print_model(estimator[1], estimator[0]+ ' - ')
        elif hasattr(step[1], '_base_learners') and hasattr(step[1], '_meta_learner'):
            print("\nMeta Learner")
            pprint(step[1]._meta_learner)
            print()
            for estimator in step[1]._base_learners:
                print_model(estimator[1], estimator[0]+ ' - ')
        else:
            pprint(step[1].get_params())
            print()

In [None]:
automl_best_run, automl_best_model = remote_run.get_output()

automl_best_run_metrics = automl_best_run.get_metrics()

print(f'********** Best AutoML accuracy: {automl_best_run_metrics.get("accuracy")}')
print(f'********** printing Best AutoML run:\n{automl_best_run}\n\nPrinting model:')

print_model(automl_best_model)

********** Best AutoML accuracy: 0.8705972434915773
********** printing Best AutoML run:
Run(Experiment: exp-capstone-automl,
Id: AutoML_f270b76e-c5be-4fc5-b8e0-7ae67ebd55e6_18,
Type: azureml.scriptrun,
Status: Completed)

Printing model:
datatransformer
{'enable_dnn': False,
 'enable_feature_sweeping': True,
 'feature_sweeping_config': {},
 'feature_sweeping_timeout': 86400,
 'featurization_config': None,
 'force_text_dnn': False,
 'is_cross_validation': True,
 'is_onnx_compatible': False,
 'observer': None,
 'task': 'classification',
 'working_dir': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/notebook161266/code/Users/odl_user_161266'}

prefittedsoftvotingclassifier
{'estimators': ['5', '8', '13', '0', '12', '11', '10'],
 'weights': [0.2857142857142857,
             0.07142857142857142,
             0.14285714285714285,
             0.07142857142857142,
             0.21428571428571427,
             0.14285714285714285,
             0.07142857142857142]}

5 - standardscalerwrapp

In [None]:
print(remote_run.get_metrics())

{'experiment_status': ['DatasetEvaluation', 'FeaturesGeneration', 'DatasetFeaturization', 'DatasetFeaturizationCompleted', 'DatasetCrossValidationSplit', 'ModelSelection', 'BestRunExplainModel', 'ModelExplanationDataSetSetup', 'PickSurrogateModel', 'EngineeredFeatureExplanations', 'EngineeredFeatureExplanations', 'RawFeaturesExplanations'], 'experiment_status_description': ['Gathering dataset statistics.', 'Generating features for the dataset.', 'Beginning to fit featurizers and featurize the dataset.', 'Completed fit featurizers and featurizing the dataset.', 'Generating individually featurized CV splits.', 'Beginning model selection.', 'Best run model explanations started', 'Model explanations data setup completed', 'Choosing LightGBM as the surrogate model for explanations', 'Computation of engineered features started', 'Computation of engineered features completed', 'Computation of raw features started'], 'precision_score_weighted': 0.8663922340392869, 'weighted_accuracy': 0.937198

In [None]:
#TODO: Save the best model
joblib.dump(automl_best_model, CAPSTONE_DEPLOYED_MODEL_PATH)

['./outputs/automl_model.joblib']

## Model Deployment

Remember you have to deploy only one of the two models you trained but you still need to register both the models. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [None]:
# Refer - https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-and-where?tabs=python

# Tutorial: Deploy an image classification model in Azure Container Instances -
# https://docs.microsoft.com/en-us/azure/machine-learning/tutorial-deploy-models-with-aml

# Register the model
registered_model = remote_run.register_model(description=CAPSTONE_MODEL_DESCRIPTION)
print(f'{remote_run.model_id}')
print(f'{registered_model.name}  {registered_model.id}  {registered_model.version}')

curated_env = Environment.get(workspace=ws, name=CURATED_ENV_NAME)


# Possibly create an inference config

env = Environment(name="capstone_environment")
inference_config = InferenceConfig(
    environment=curated_env,
    source_directory=CAPSTONE_SOURCE_DIRECTORY,
    entry_script=CAPSTONE_SCORING_SCRIPT,
)

aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service = Model.deploy(workspace=ws,
                       name=CAPSTONE_ENV_SERVICE,
                       models=[registered_model],
                       inference_config=inference_config,
                       deployment_config=aci_config,
                       overwrite=True)
service.wait_for_deployment(show_output=True)



ERROR:azureml._model_management._util:entry_script score.py doesn't exist. entry_script should be path relative to current working directory



AutoMLf270b76ec18
AutoMLf270b76ec18  AutoMLf270b76ec18:1  1


WebserviceException: WebserviceException:
	Message: entry_script score.py doesn't exist. entry_script should be path relative to current working directory
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "entry_script score.py doesn't exist. entry_script should be path relative to current working directory"
    }
}

TODO: In the cell below, send a request to the web service you deployed to test it.

In [None]:
# To enable ApplicationInsights on the service (webservice), 
# * first access the endpoint using the name assigned at the time of deployment
# * next update webservice parameters such as enabling application insights (enable_app_insights)

proj_webservice = Webservice(
    workspace = ws,
    name=CAPSTONE_DEPLOYED_MODEL_NAME
)

proj_webservice.update(
    enable_app_insights=True
)

# At this point application insights (logging is enabled) and can be
# checked in the GUI in AutoML studio

In [None]:
# URL for the web service, should be similar to:
# 'http://8530a665-66f3-49c8-a953-b82a2d312917.eastus.azurecontainer.io/score'

# From the tail end of the code at
# https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-and-where?tabs=python
# - Deploy machine learning models to Azure








service = Webservice(workspace=ws, name="CAPSTONE_ENV_SERVICE")
scoring_uri = service.scoring_uri

# If the service is authenticated, set the key or token
key, _ = service.get_keys()

# Set the appropriate headers
headers = {"Content-Type": "application/json"}
headers["Authorization"] = f"Bearer {key}"



# fixed ac	   volatile ac	citric acid	  residual sugar	chlorides	  free sulfurdi	total sulfurdi	density	       pH	        sulphates	    alcohol	quality		
# 0.883090875	0.3150853064	-0.5304215055	-0.1166025484	-0.447289012	-0.7237011554	-0.6908704601	-0.01249670459	1.004852702	0.4394546089	0.3947056997	0		
# 0.7645889612	1.307202455	-0.8609459206	1.657825186	0.3765862299	-0.4297069397	0.8386109571	1.655893566	-0.05474573919	0.001341709573	-0.6616718988	0		




# Two sets of data to score, so we get two results back
# data = {"data":
#         [
#           {
#             "fixed acidity": 0.883090875,
#             "volatile acidity": "0.3150853064",
#             "citric acid": "-0.5304215055",
#             "residual sugar": "-0.1166025484",
#             "chlorides": "-0.447289012",
#             "free sulfur dioxide": "-0.7237011554",
#             "total sulfur dioxide": "-0.6908704601",
#             "density": "-0.01249670459",
#             "pH": "1.004852702",
#             "sulphates": "0.4394546089",
#             "alcohol": 0.3947056997,
#           },
#           {
#             "fixed acidity": 0.7645889612,
#             "volatile acidity": "1.307202455",
#             "citric acid": "-0.8609459206",
#             "residual sugar": "1.657825186",
#             "chlorides": "0.3765862299",
#             "free sulfur dioxide": "-0.4297069397",
#             "total sulfur dioxide": "0.8386109571",
#             "density": "1.655893566",
#             "pH": "-0.05474573919",
#             "sulphates": "0.001341709573",
#             "alcohol": 0.3947056997,
#           },
#       ]
#     }


data = {"data":
        [
          [
           0.883090875,
           0.3150853064,
          -0.5304215055,
          -0.1166025484,
          -0.447289012,
          -0.7237011554,
          -0.6908704601,
          -0.01249670459,
          1.004852702,
          0.4394546089,
          0.3947056997
          ],
          [
          0.7645889612,
          1.307202455,
          -0.8609459206,
          1.657825186,
          0.3765862299,
          -0.4297069397,
          0.8386109571,
          1.655893566,
          -0.05474573919,
          0.001341709573,
          0.3947056997
          ]
        ]
    }

# Convert to JSON string
input_data = json.dumps(data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())

TODO: In the cell below, print the logs of the web service and delete the service

In [None]:
logs = proj_webservice.get_logs()

for line in logs.split('\n'):
    print(line)



In [None]:
# Clean up any resources
# Delete the Webservice
# delete the compute cluster

proj_webservice.delete()
cc.delete()

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
