# Automated ML

Import all the dependencies

In [None]:
import logging
import os
import json
import csv
import numpy as np
import pandas as pd
import pkg_resources
import joblib

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException
from azureml.pipeline.steps import AutoMLStep
from azureml.widgets import RunDetails
from azureml.core import Model, Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from sklearn.preprocessing import StandardScaler

print("SDK version:", azureml.core.VERSION)

## Dataset

### Overview
This dataset is originally from the National Institute of Diabetes and Digestive and Kidney Diseases. The objective of the dataset is to diagnostically predict whether or not a patient has diabetes, based on certain diagnostic measurements included in the dataset. Several constraints were placed on the selection of these instances from a larger database. In particular, all patients here are females at least 21 years old of Pima Indian heritage.
https://www.kaggle.com/uciml/pima-indians-diabetes-database

Objective:
Predict the “Outcome” column based on 8 input features, whether the patient has diabetes or not.

In [None]:
ws = Workspace.from_config()

experiment_name = 'automl-experiment'

experiment=Experiment(ws, experiment_name)

In [None]:
found = False
key = "diabetes"
description_text = "Pima Indians Diabetes"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
        example_data = 'https://raw.githubusercontent.com/dokiem/AZMLFinalProject/main/diabetes.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)        
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)

df = dataset.to_pandas_dataframe()
df.describe()
df.info()

In [None]:
df.head()

In [None]:
amlcompute_cluster_name = "cluster-kiemdv1"

try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_V2', 
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, 
                                          amlcompute_cluster_name, 
                                          compute_config)

compute_target.wait_for_completion(show_output=True)

## AutoML Configuration

Overview of the automl settings and configuration used for this experiment:

In [None]:
automl_settings = {
    "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": 4,
    "primary_metric" : 'accuracy',
    "n_cross_validations": 5,
    "iterations": 24
}

automl_config = AutoMLConfig(compute_target=compute_target,
                             task = 'classification',
                             training_data=dataset,
                             label_column_name='Outcome',
                             enable_early_stopping= True,
                             featurization = 'auto',
                             debug_log = 'automl_errors.log',
                             **automl_settings
                            )

In [None]:
automl_run = experiment.submit(automl_config, show_output=True)
RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)

In [None]:
RunDetails(automl_run).show()

In [None]:
automl_run.wait_for_completion(show_output=True)

## Best Model

The cell below shows the best model from the automl experiments and display all the properties of the model.



In [None]:
best_run, fitted_model = automl_run.get_output()
best_run.get_metrics()

In [None]:
model = best_run.register_model(model_name='automl-model',model_path='outputs/model.pkl')

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

Register the best model, created an inference config and deployed the model as a web service.

In [None]:
best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'scoring.py')
best_run.download_file('outputs/conda_env_v_1_0_0.yml', 'envFile.yml')

In [None]:
environment = best_run.get_environment()
inference_config = InferenceConfig(entry_script='scoring.py',
                                   environment=environment)
service_name = 'automl-deploy'
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, 
                                                       memory_gb=1,
                                                       auth_enabled= True, 
                                                       enable_app_insights= True)

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=deployment_config,
                       overwrite=True
                      )
service.wait_for_deployment(show_output=True)
print("Scoring URI: ", service.scoring_uri)
print("Swagger link: ", service.swagger_uri)

Sent a request to the web service to test it.

In [None]:
import requests
import json

key = "ndxccEXXt2MEjC4Je27KWBHbfEZ90rFw"
data = {"data": [{"Pregnancies": 10, 
     "Glucose": 120, 
     "BloodPressure": 60, 
     "SkinThickness": 30, 
     "Insulin": 20, 
     "BMI": 37, 
     "DiabetesPedigreeFunction": 0.513, 
     "Age": 35},

    {"Pregnancies": 8, 
     "Glucose": 91, 
     "BloodPressure": 65, 
     "SkinThickness": 31, 
     "Insulin": 10, 
     "BMI": 29, 
     "DiabetesPedigreeFunction": 0.402, 
     "Age": 30},
      ],
  "method": "predict"}
    
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

headers = {'Content-Type': 'application/json'}
headers['Authorization'] = f'Bearer {key}'

resp = requests.post(service.scoring_uri, input_data, headers=headers)
print(resp.json())
print("(Note: 0: Not Diabetes, Case 1: Diabetes)")

Print the logs of the web service and delete the service

In [None]:
logs = service.get_logs()
logs

In [None]:
service.delete()