# Automated ML

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

[Wine Quality Data Set](https://archive.ics.uci.edu/ml/datasets/Wine+Quality)

In [None]:
from azureml.core.workspace import Workspace
from azureml.core.datastore import Datastore
from azureml.core.compute import ComputeTarget
from azureml.core.compute.amlcompute import AmlCompute
from azureml.exceptions import ComputeTargetException
from azureml.core.experiment import Experiment
from azureml.core.run import Run
from azureml.core.dataset import Dataset

from azureml.core import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice


from azureml.core.webservice import Webservice
from azureml.core.authentication import InteractiveLoginAuthentication

import pandas as pd

from azureml.pipeline.core.pipeline import Pipeline
from azureml.pipeline.core import PipelineData
from azureml.pipeline.core import TrainingOutput
from azureml.pipeline.core.run import PipelineRun
from azureml.pipeline.steps.automl_step import AutoMLStep

from azureml.train.automl.automlconfig import AutoMLConfig
from azureml.data import TabularDataset
from azureml.widgets.run_details import RunDetails

import json
import pickle
import requests

from pprint import pprint

In [None]:
CAPSTONE_FOLDER = 'capstone-ml'
CAPSTONE_DEBUG_LOG = 'capstone-ml.log'
CAPSTONE_LABEL_COLUMN_NAME = 'y'

CAPSTONE_AUTOMLSTEP_NAME = 'AutoML Training Step'

CAPSTONE_EXPERIMENT_NAME_AUTOML = 'exp-capstone-automl'
CAPSTONE_EXPERIMENT_NAME_STEP7 = 'exp-capstone-step7'
CAPSTONE_TABULAR_WINE_DATA = 'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/wine_train.csv'
CAPSTONE_DATASET_NAME = 'White Wine Data'
CAPSTONE_DATASET_DESCRIPTION = 'Wine data - does the wine taste good?'

CAPSTONE_DEPLOYED_MODEL_NAME = 'wine-taste-automl'
CAPSTONE_DEPLOYED_MODEL_PATH = './outputs/best_automl.pkl'

CAPSTONE_PIPELINEDATA_METRICS_NAME = 'PipelineData_Metrics' 
CAPSTONE_PIPELINEDATA_MODEL_NAME = 'PipelineData_Model' 
CAPSTONE_PIPELINE_OUTPUT_METRICS_NAME = 'Pipeline Metrics Output' 
CAPSTONE_PIPELINE_OUTPUT_MODEL_NAME = 'Pipeline Model Output' 
CAPSTONE_PIPELINE_DESCRIPTION = 'AutoML Pipeline to train model on the wine data'
CAPSTONE_EXPERIMENT_NAME = 'AutoML Train Wine Data Experiment'
CAPSTONE_ENV_SERVICE = 'capstone-env-service'

CAPSTONE_PUBLISHED_PIPELINE_NAME = 'Wine Data Training Pipeline'
CAPSTONE_PUBLISHED_PIPELINE_DESCRIPTION = 'This pipeline trains on the Wine Data'
CAPSTONE_PUBLISHED_PIPELINE_VERSION='1.0'

CAPSTONE_CONSUME_PIPELINE_ENDPOINT_EXPERIMENT = 'exp-run-pipeline' #

CC_NAME = "CPU-CC"  # CPU Compute Cluster
CURATED_ENV_NAME = 'AzureML-Tutorial'

## Dataset

### Overview
This machine learning program detects the wine quality of white wine.
The task is to determine if the wine is tasty or not.


TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [None]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = CAPSTONE_EXPERIMENT_NAME_AUTOML

experiment=Experiment(ws, experiment_name)

In [None]:
# Next, let's use if it exists, or create if required, a compute cluster to be used by the ML

# Access the compute cluster. If it exists, we will have the compute object. 
# If it does not exist, an exception will be thrown upon which the compute cluster is created
try:
    cc = ComputeTarget(workspace=ws, name=CC_NAME)
    print(f'Compute Cluster target exists and we have a handle to the same')
except ComputeTargetException:
    # Failed to obtain the compute cluster object
    # In all likelihood, a compute cluster of that name has not been created
    # Attempt to create the compute cluster
    # First set up the configuration

    # Specify the configuration of the compute cluster
    cc_cfg = AmlCompute.provisioning_configuration(vm_size='Standard_DS12_v2', min_nodes=1, max_nodes=6)
    cc = ComputeTarget.create(workspace=ws, name=CC_NAME, provisioning_configuration=cc_cfg)

# At this point - we have access to the compute cluster object. Wait for the compute target to complete provisioing
cc.wait_for_completion(show_output='True')

In [None]:
# grab the data and create a dataset
# See if the dataset already exists - if so, skip the Dataset creation pieces
data_uri = CAPSTONE_TABULAR_WINE_DATA

ds_name = CAPSTONE_DATASET_NAME
dsets = ws.datasets.keys()

if ds_name in dsets:
    # dataset exists
    proj_ds = dsets[ds_name]
else:
    # Data set not found. Must create it
    proj_ds = Dataset.Tabular.from_delimited_files(data_uri)
    # Register the dataset so that on repeated runs, the data does not have to be fetched evey time
    proj_ds = proj_ds.register(workspace=ws, name=ds_name, description=CAPSTONE_DATASET_DESCRIPTION)

# Take a peek at the data by converting the same to a Pandas dataframe
proj_df = proj_ds.to_pandas_dataframe()

# print the data
proj_df

In [None]:
proj_df.describe()

## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [None]:
# TODO: Put your automl settings here

automl_settings = {
    "iterations" : 20,
    "experiment_timeout_minutes" : 30,
    "enable_early_stopping" : True,
    "iteration_timeout_minutes" : 5,
    "max_concurrent_iterations" : 5,
    "max_cores_per_iteration" : -1,
    "n_cross_validations" : 3,
    "primary_metric" : 'AUC_weighted',
    "verbosity" : logging.INFO,
}

# Provide the remainder of the settings/configuration
# Note that we are not providing a validation data set - and we may need to
# 


# TODO: Put your automl config here
automl_config = AutoMLConfig(
    compute_target = cc,
    task='classification',
    training_data=proj_ds,
    label_column_name=CAPSTONE_LABEL_COLUMN_NAME,
    path=CAPSTONE_FOLDER,
    featurization='auto',
    model_explainability=True,
    debug_log=CAPSTONE_DEBUG_LOG,
    **automl_settings)

In [None]:
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [None]:
RunDetails(remote_run).show()

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [None]:
def print_model(model, prefix=""):
    for step in model.steps:
        print(prefix + step[0])
        if hasattr(step[1], 'estimators') and hasattr(step[1], 'weights'):
            pprint({'estimators': list(e[0] for e in step[1].estimators), 'weights': step[1].weights})
            print()
            for estimator in step[1].estimators:
                print_model(estimator[1], estimator[0]+ ' - ')
        elif hasattr(step[1], '_base_learners') and hasattr(step[1], '_meta_learner'):
            print("\nMeta Learner")
            pprint(step[1]._meta_learner)
            print()
            for estimator in step[1]._base_learners:
                print_model(estimator[1], estimator[0]+ ' - ')
        else:
            pprint(step[1].get_params())
            print()

In [None]:
automl_best_run, automl_best_model = automlremote_run.get_output()

print(f'********** Best AutoML accuracy: {automl_best_run_metrics.get("accuracy")}')
print(f'********** printing Best AutoML run:\n{automl_best_run}\n\nPrinting model:')

print_model(automl_best_model)

In [None]:
#TODO: Save the best model
joblib.dump(automl_best_model, './outputs/automl_model.joblib')

## Model Deployment

Remember you have to deploy only one of the two models you trained but you still need to register both the models. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [None]:
# Refer - https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-and-where?tabs=python

# Tutorial: Deploy an image classification model in Azure Container Instances -
# https://docs.microsoft.com/en-us/azure/machine-learning/tutorial-deploy-models-with-aml

# Register the model
model = automl_best_run.register_model(model_name=CAPSTONE_DEPLOYED_MODEL_NAME, 
                                       model_path='CAPSTONE_DEPLOYED_MODEL_PATH')


curated_env_name = CURATED_ENV_NAME
curated_env = Environment.get(workspace=ws, name=curated_env_name)


# Possibly create an inference config

env = Environment(name="capstone_environment")
inference_config = InferenceConfig(
    environment=curated_env,
    entry_script="score.py",
)


service_name = CAPSTONE_ENV_SERVICE

aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[automl_best_model],
                       inference_config=inference_config,
                       deployment_config=aci_config,
                       overwrite=True)
service.wait_for_deployment(show_output=True)



TODO: In the cell below, send a request to the web service you deployed to test it.

In [None]:
# To enable ApplicationInsights on the service (webservice), 
# * first access the endpoint using the name assigned at the time of deployment
# * next update webservice parameters such as enabling application insights (enable_app_insights)

proj_webservice = Webservice(
    workspace = ws,
    name=CAPSTONE_DEPLOYED_MODEL_NAME
)

proj_webservice.update(
    enable_app_insights=True
)

# At this point application insights (logging is enabled) and can be
# checked in the GUI in AutoML studio

In [None]:
# URL for the web service, should be similar to:
# 'http://8530a665-66f3-49c8-a953-b82a2d312917.eastus.azurecontainer.io/score'

# From the tail end of the code at
# https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-and-where?tabs=python
# - Deploy machine learning models to Azure








service = Webservice(workspace=ws, name="CAPSTONE_ENV_SERVICE")
scoring_uri = service.scoring_uri

# If the service is authenticated, set the key or token
key, _ = service.get_keys()

# Set the appropriate headers
headers = {"Content-Type": "application/json"}
headers["Authorization"] = f"Bearer {key}"

# Make the request and display the response and logs
data = {
    "query": "What color is the fox",
    "context": "The quick brown fox jumped over the lazy dog.",
}
data = json.dumps(data)
resp = requests.post(scoring_uri, data=data, headers=headers)
print(resp.text)


fixed ac	   volatile ac	citric acid	  residual sugar	chlorides	  free sulfurdi	total sulfurdi	density	       pH	        sulphates	    alcohol	quality		
0.883090875	0.3150853064	-0.5304215055	-0.1166025484	-0.447289012	-0.7237011554	-0.6908704601	-0.01249670459	1.004852702	0.4394546089	0.3947056997	0		
0.7645889612	1.307202455	-0.8609459206	1.657825186	0.3765862299	-0.4297069397	0.8386109571	1.655893566	-0.05474573919	0.001341709573	-0.6616718988	0		
0.883090875	0.3150853064	-0.5304215055	-0.1166025484	-0.447289012	-0.7237011554	-0.6908704601	-0.01249670459	1.004852702	0.4394546089	0.3947056997	0		




# Two sets of data to score, so we get two results back
data = {"data":
        [
          {
            "fixed acidity": 0.883090875,
            "volatile acidity": "0.3150853064",
            "citric acid": "-0.5304215055",
            "residual sugar": "-0.1166025484",
            "chlorides": "-0.447289012",
            "free sulfur dioxide": "-0.7237011554",
            "total sulfur dioxide": "-0.6908704601",
            "density": "-0.01249670459",
            "pH": "1.004852702",
            "sulphates": "0.4394546089",
            "alcohol": 0.3947056997,
          },
          {
            "fixed acidity": 0.7645889612,
            "volatile acidity": "1.307202455",
            "citric acid": "-0.8609459206",
            "residual sugar": "1.657825186",
            "chlorides": "0.3765862299",
            "free sulfur dioxide": "-0.4297069397",
            "total sulfur dioxide": "0.8386109571",
            "density": "1.655893566",
            "pH": "-0.05474573919",
            "sulphates": "0.001341709573",
            "alcohol": 0.3947056997,
          },
      ]
    }
# Convert to JSON string
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())

TODO: In the cell below, print the logs of the web service and delete the service

In [None]:
logs = proj_webservice.get_logs()

for line in logs.split('\n'):
    print(line)



In [None]:
# Clean up any resources
# Delete the Webservice
# delete the compute cluster

proj_webservice.delete()
cc.delete()

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
