# Automated ML



In [1]:
import logging 
import os
import csv

from matplotlib import pyplot as plt 
import numpy as np
import pandas as pd

import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

print("SDK Version:",azureml.core.VERSION)

SDK Version: 1.18.0


## Dataset

### Overview

This dataset contains house sale prices for King County, which includes Seattle. It includes homes sold between May 2014 and May 2015.

It's a great dataset for evaluating regression models to predict the house price 



In [2]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'Avinash-AutoML'

experiment=Experiment(ws, experiment_name)

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code RAT3SVXRK to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.


In [3]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

aml_compute_cluster = "Avinash-Test"

try:
    compute_target = ComputeTarget(workspace=ws,name = aml_compute_cluster)
    print('Found Exisitng cluster, use it ')
except ComputeTargetException:

    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',max_nodes = 4,)
    compute_target = ComputeTarget.create(ws,aml_compute_cluster)


compute_target.wait_for_completion(show_output=True)

Found Exisitng cluster, use it 
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
found = False

key = "house_sales"
description_text = "house sales prediction dataset"

if key in ws.datasets.keys():
    found = True
    dataset = ws.datasets[key]

df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,id,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
count,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0,21613.0
mean,4580302000.0,540088.1,3.370842,2.114757,2079.899736,15106.97,1.494309,0.007542,0.234303,3.40943,7.656873,1788.390691,291.509045,1971.005136,84.402258,98077.939805,47.560053,-122.213896,1986.552492,12768.455652
std,2876566000.0,367127.2,0.930062,0.770163,918.440897,41420.51,0.539989,0.086517,0.766318,0.650743,1.175459,828.090978,442.575043,29.373411,401.67924,53.505026,0.138564,0.140828,685.391304,27304.179631
min,1000102.0,75000.0,0.0,0.0,290.0,520.0,1.0,0.0,0.0,1.0,1.0,290.0,0.0,1900.0,0.0,98001.0,47.1559,-122.519,399.0,651.0
25%,2123049000.0,321950.0,3.0,1.75,1427.0,5040.0,1.0,0.0,0.0,3.0,7.0,1190.0,0.0,1951.0,0.0,98033.0,47.471,-122.328,1490.0,5100.0
50%,3904930000.0,450000.0,3.0,2.25,1910.0,7618.0,1.5,0.0,0.0,3.0,7.0,1560.0,0.0,1975.0,0.0,98065.0,47.5718,-122.23,1840.0,7620.0
75%,7308900000.0,645000.0,4.0,2.5,2550.0,10688.0,2.0,0.0,0.0,4.0,8.0,2210.0,560.0,1997.0,0.0,98118.0,47.678,-122.125,2360.0,10083.0
max,9900000000.0,7700000.0,33.0,8.0,13540.0,1651359.0,3.5,1.0,4.0,5.0,13.0,9410.0,4820.0,2015.0,2015.0,98199.0,47.7776,-121.315,6210.0,871200.0


In [23]:
from sklearn.model_selection import train_test_split
x_train, x_test = train_test_split(df,test_size = 0.2,random_state = 42)

## AutoML Configuration



In [6]:
# TODO: Put your automl settings here
automl_settings = {
    "iteration_timeout_minutes": 10,
    "experiment_timeout_hours" : 0.3,
    "enable_early_stopping": True,
    "primary_metric" : 'normalized_mean_absolute_error',
    "featurization": 'auto',
    "verbosity": logging.INFO,
    "n_cross_validations": 5
}

# TODO: Put your automl config here
automl_config = AutoMLConfig(
    task = 'regression',
    debug_log = 'automl_reg_errors.log',
    training_data = x_train,
    label_column_name = "price",
    **automl_settings
)

## Run Details



In [7]:
from azureml.widgets import RunDetails
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config, show_output = True)

RunDetails(remote_run).show()
remote_run.get_status()
remote_run.wait_for_completion()


Running on local machine
Parent Run ID: AutoML_d8b08989-fcef-4c17-9747-bbd0f27cd685

Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed fit featurizers and featurizing the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardi

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

{'runId': 'AutoML_d8b08989-fcef-4c17-9747-bbd0f27cd685',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2020-12-05T17:36:49.658282Z',
 'endTimeUtc': '2020-12-05T17:56:30.062102Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'normalized_mean_absolute_error',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'local',
  'DataPrepJsonString': None,
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'regression',
  'dependencies_versions': '{"azureml-widgets": "1.18.0", "azureml-train": "1.18.0", "azureml-train-restclients-hyperdrive": "1.18.0", "azureml-train-core": "1.18.0", "azureml-train-automl": "1.18.0", "azureml-train-automl-runtime": "1.18.0", "azureml-train-automl-client": "1.18.0", "azureml-tensorboard": "1.18.0", "azureml-telemetry": "1.18.0", "azureml-sdk": "1.18.0", "azureml-s

## Best Model





In [8]:
best_run,fitted_model = remote_run.get_output()
print(best_run)
print(fitted_model.steps)


Run(Experiment: Avinash-AutoML,
Id: AutoML_d8b08989-fcef-4c17-9747-bbd0f27cd685_25,
Type: None,
Status: Completed)
[('datatransformer', DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                feature_sweeping_config=None, feature_sweeping_timeout=None,
                featurization_config=None, force_text_dnn=None,
                is_cross_validation=None, is_onnx_compatible=None, logger=None,
                observer=None, task=None, working_dir=None)), ('prefittedsoftvotingregressor', PreFittedSoftVotingRegressor(estimators=[('0',
                                          Pipeline(memory=None,
                                                   steps=[('maxabsscaler',
                                                           MaxAbsScaler(copy=True)),
                                                          ('lightgbmregressor',
                                                           LightGBMRegressor(boosting_type='gbdt',
                                   

In [9]:
#TODO: Save the best model
import joblib

joblib.dump(fitted_model,"housing_sales.pkl")


In [18]:
model = best_run.register_model(model_path = './outputs',model_name = 'house_sales1')

## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.



In [21]:
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core.model import Model

from azureml.core.environment import Environment
from azureml.automl.core.shared import constants
best_run.download_file(constants.CONDA_ENV_FILE_PATH, 'myenv.yml')
myenv = Environment.from_conda_specification(name="myenv", file_path="myenv.yml")


inference_config = InferenceConfig(entry_script="scoring.py",environment=myenv)

aci_config = AciWebservice.deploy_configuration(
    cpu_cores=1,
    auth_enabled=True,
    memory_gb=1,
    tags = {'name':'housing sales'},
    description='housing sales model'
)

service = Model.deploy(workspace=ws,
                        name = 'house-sale',
                        models= [model],
                        inference_config=inference_config,
                        deployment_config=aci_config
)

service.wait_for_deployment(True)
print(service.state)


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running..................................................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


Calling the Model Endpoint 

In [39]:
#y_test = x_test.pop("price")
import json 

x_json = x_test[:5].to_json(orient = 'records')
parsed = json.loads(x_json)

#print(parsed)
test_samples = json.dumps({"data":parsed})
#print(test_samples)
result = service.run(input_data = test_samples)

print("predicted model output: ",result)
print("correct Price of the house :", y_test[0:5])

predicted model output:  [ 401975.47936735  959191.34284196 1089646.772514   1584312.94014468
  687234.46923464]
correct Price of the house : 735      365000.00
2830     865000.00
4106    1038000.00
16218   1490000.00
19964    711000.00
Name: price, dtype: float64


Printing the Logs and deleting the service

In [36]:
service.get_logs()


'2020-12-05T19:24:09,159945800+00:00 - gunicorn/run \n2020-12-05T19:24:09,167508155+00:00 - nginx/run \n2020-12-05T19:24:09,170467777+00:00 - rsyslog/run \n/usr/sbin/nginx: /azureml-envs/azureml_99474c16f35a92149d1cc6dec149d8e9/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_99474c16f35a92149d1cc6dec149d8e9/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_99474c16f35a92149d1cc6dec149d8e9/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_99474c16f35a92149d1cc6dec149d8e9/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_99474c16f35a92149d1cc6dec149d8e9/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\nrsyslogd: /azureml-envs/azureml_99474c16f35a92149d1cc

In [37]:
service.delete()