In [1]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.38.0 to work with aml_ws


In [2]:
# Get the default datastore
default_ds = ws.get_default_datastore()

# Enumerate all datastores, indicating which is the default
for ds_name in ws.datastores:
    print(ds_name, "- Default =", ds_name == default_ds.name)

skdatastore - Default = True
azureml_globaldatasets - Default = False
workspacefilestore - Default = False
workspaceartifactstore - Default = False
workspaceblobstore - Default = False


In [3]:
from azureml.core import Workspace, Datastore, Dataset
from azureml.data.datapath import DataPath

In [4]:
from azureml.core import Dataset

sekurit_ds = Dataset.get_by_name(ws, name='sekuritdataset')
data = sekurit_ds.to_pandas_dataframe()

In [5]:
data.head()

Unnamed: 0,Model,Length,Type,Style,OEM,Engine Disp,Age of OEM,Year,Mileage,Oil Price,Petrol,Automatic,Price,Sales,Column17
0,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Jan-14,16,102.1,True,False,5.0,1207,
1,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Feb-14,16,104.83,True,False,5.0,882,
2,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Mar-14,16,104.04,True,False,5.0,839,
3,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Apr-14,16,104.87,True,False,5.0,547,
4,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,May-14,16,105.71,True,False,5.0,571,


In [6]:
import os
# Create a folder for the pipeline step files
experiment_folder = 'sekurit_automl_pipeline'
os.makedirs(experiment_folder, exist_ok=True)

print(experiment_folder)

sekurit_automl_pipeline


In [7]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "computecluster"

try:
    # Check for existing compute target
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        pipeline_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


In [8]:
%%writefile $experiment_folder/experiment_env.yml

name: experiment_env
dependencies:
- python=3.6.2
- scikit-learn
- ipykernel
- matplotlib
- pandas
- pip
- pip:
  - azureml-defaults
  - azureml-sdk[automl]
  - pyarrow
  - fastparquet

Overwriting sekurit_automl_pipeline/experiment_env.yml


In [9]:
from azureml.core import Environment
from azureml.core.runconfig import RunConfiguration

# Create a Python environment for the experiment (from a .yml file)
experiment_env = Environment.from_conda_specification("experiment_env", experiment_folder + "/experiment_env.yml")

# Register the environment 
experiment_env.register(workspace=ws)
registered_env = Environment.get(ws, 'experiment_env')

# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above. 
pipeline_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print ("Run configuration created.")

Run configuration created.


In [10]:
import pyarrow

data.to_parquet("test.parquet",engine='pyarrow', index=False)


In [11]:
import pandas as pd

df = pd.read_parquet('test.parquet', engine='pyarrow')

In [12]:
df.head()

Unnamed: 0,Model,Length,Type,Style,OEM,Engine Disp,Age of OEM,Year,Mileage,Oil Price,Petrol,Automatic,Price,Sales,Column17
0,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Jan-14,16,102.1,True,False,5.0,1207,
1,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Feb-14,16,104.83,True,False,5.0,882,
2,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Mar-14,16,104.04,True,False,5.0,839,
3,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Apr-14,16,104.87,True,False,5.0,547,
4,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,May-14,16,105.71,True,False,5.0,571,


## AUTOML

In [13]:
%%writefile $experiment_folder/dataprep.py

# Import libraries
from azureml.core import Run

import pandas as pd 
import numpy as np 
import argparse
from sklearn.preprocessing import MinMaxScaler
import pyarrow as pa


# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--output_path', dest='output_path', required=True)
args = parser.parse_args()

sekurit_ds = Run.get_context().input_datasets['sekuritdataset']
data = sekurit_ds.to_pandas_dataframe().drop(['Column17','Year'], axis=1)

data['Sales']=pd.to_numeric(data['Sales'],errors='coerce')
data= data[data['Sales'].notna()]
data = data.drop(data[data.Sales < 0].index)

# Label Encoding
list_of_columns = ['Model','Type','Style','OEM']
data[list_of_columns] = data[list_of_columns].apply(lambda col:pd.Categorical(col).codes)

# Boolean Encoding
data[["Petrol", "Automatic"]] *= 1

# Normalize the numeric columns
scaler = MinMaxScaler()
num_cols = ['Length','Engine Disp','Age of OEM','Mileage','Oil Price','Price']
data[num_cols] = scaler.fit_transform(data[num_cols])


#data.to_parquet(os.path.join(args.output_path,"prepped_data.parquet"),preserve_index=False)
data.to_parquet(os.path.join(args.output_path, "prepped_data.parquet"),engine="pyarrow", index=False)



Overwriting sekurit_automl_pipeline/dataprep.py


In [14]:
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.steps import PythonScriptStep

prepped_data_path = OutputFileDatasetConfig(name="output_path")


dataprep_step = PythonScriptStep(
    name="dataprep", 
    source_directory = experiment_folder,
    script_name="dataprep.py", 
    compute_target=pipeline_cluster, 
    runconfig=pipeline_run_config,
    arguments=["--output_path", prepped_data_path],
    inputs=[sekurit_ds.as_named_input('sekuritdataset')],
    allow_reuse=True
)


In [15]:
prepped_data = prepped_data_path.read_parquet_files()
df = pd.read_parquet('test.parquet', engine='pyarrow')

In [16]:
from azureml.pipeline.core import TrainingOutput, PipelineData

metrics_data = PipelineData(name='metrics_data',
                            datastore=default_ds,
                            pipeline_output_name='metrics_output',
                            training_output=TrainingOutput(type='Metrics'))

model_data = PipelineData(name='best_model_data',
                          datastore=default_ds,
                          pipeline_output_name='model_output',
                          training_output=TrainingOutput(type='Model'))


In [17]:
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.steps import AutoMLStep
import logging


automl_settings = {
    "iteration_timeout_minutes": 10,
    "experiment_timeout_hours": 0.25,
    "enable_early_stopping": True,
    "primary_metric": 'normalized_root_mean_squared_error',
    "featurization": 'auto',
    "verbosity": logging.INFO,
    "n_cross_validations": 2
}

automl_config = AutoMLConfig(name='Automated ML Experiment',
                             task='regression',
                             debug_log='automated_ml_errors.log',
                             compute_target=pipeline_cluster,
                             training_data = prepped_data,
                             label_column_name="Sales",
                             **automl_settings)

train_step = AutoMLStep(name='AutoML_Regression',
    automl_config=automl_config,
    passthru_automl_config=False,
    outputs=[metrics_data,model_data],
    enable_default_model_output=False,
    enable_default_metrics_output=False,
    allow_reuse=True)


print("Ready for Auto ML run.")

Ready for Auto ML run.


In [18]:
%%writefile $experiment_folder/register_model.py
from azureml.core.model import Model, Dataset
from azureml.core.run import Run, _OfflineRun
from azureml.core import Workspace
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--model_name", required=True)
parser.add_argument("--model_path", required=True)
args = parser.parse_args()

print(f"model_name : {args.model_name}")
print(f"model_path: {args.model_path}")

run = Run.get_context()
ws = Workspace.from_config() if type(run) == _OfflineRun else run.experiment.workspace

model = Model.register(workspace=ws,
                       model_path=args.model_path,
                       model_name=args.model_name)

print("Registered version {0} of model {1}".format(model.version, model.name))

Overwriting sekurit_automl_pipeline/register_model.py


In [19]:
from azureml.pipeline.core.graph import PipelineParameter

# The model name with which to register the trained model in the workspace.
model_name = PipelineParameter("model_name", default_value="sekurit_automl_model")

register_step = PythonScriptStep(source_directory = experiment_folder,
                                 script_name="register_model.py",
                                       name="register_model",
                                       allow_reuse=False,
                                       arguments=["--model_name", model_name, "--model_path", model_data],
                                       inputs=[model_data],
                                       compute_target=pipeline_cluster,
                                       runconfig=pipeline_run_config)

In [20]:
from azureml.pipeline.core import Pipeline
from azureml.core import Experiment

pipeline = Pipeline(ws, [dataprep_step, train_step, register_step])

experiment = Experiment(workspace=ws, name='azml-automl-sekurit-pipeline')

run = experiment.submit(pipeline, show_output=True)
run.wait_for_completion()

# Create an experiment and run the pipeline
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
print("Pipeline submitted for execution.")

RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)


Created step dataprep [f3aaba66][b499beaa-c79c-45ee-9d15-2f55b2fca5d6], (This step will run and generate new outputs)Created step AutoML_Regression [477e8b31][c8cf4a6d-7ef2-4681-b029-3d79a0280f20], (This step will run and generate new outputs)

Created step register_model [0e5531d1][0656dc56-42ca-4e45-915f-7851e88aaeaa], (This step will run and generate new outputs)
Submitted PipelineRun 638e2247-202f-41af-8116-ab96e70c33b2
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/638e2247-202f-41af-8116-ab96e70c33b2?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/workspaces/aml_ws&tid=e339bd4b-2e3b-4035-a452-2112d502f2ff
PipelineRunId: 638e2247-202f-41af-8116-ab96e70c33b2
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/638e2247-202f-41af-8116-ab96e70c33b2?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/workspaces/aml_ws&tid=e339bd4b-2e3b-4035-a452-2112d502f2ff
PipelineRun Status: NotStarted




StepRun(dataprep) Execution Summary
StepRun( dataprep ) Status: Finished
{'runId': '09ed262f-507d-4299-9018-27d0820f674c', 'target': 'computecluster', 'status': 'Completed', 'startTimeUtc': '2022-03-01T20:01:36.825425Z', 'endTimeUtc': '2022-03-01T20:03:20.076008Z', 'services': {}, 'properties': {'ContentSnapshotId': '950a4815-744e-4d88-b7e6-0433ff9b2da0', 'StepType': 'PythonScriptStep', 'ComputeTargetType': 'AmlCompute', 'azureml.moduleid': 'b499beaa-c79c-45ee-9d15-2f55b2fca5d6', 'azureml.moduleName': 'dataprep', 'azureml.runsource': 'azureml.StepRun', 'azureml.nodeid': 'f3aaba66', 'azureml.pipelinerunid': '638e2247-202f-41af-8116-ab96e70c33b2', 'azureml.pipeline': '638e2247-202f-41af-8116-ab96e70c33b2', 'azureml.pipelineComponent': 'masterescloud', '_azureml.ComputeTargetType': 'amlcompute', 'ProcessInfoFile': 'azureml-logs/process_info.json', 'ProcessStatusFile': 'azureml-logs/process_status.json'}, 'inputDatasets': [{'dataset': {'id': '6c4883fa-1f48-4ea1-a6d5-5fecfeedddd5'}, 'consu




StepRunId: 32740f1f-c2dc-4b5f-98d1-7a6f9966dada
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/32740f1f-c2dc-4b5f-98d1-7a6f9966dada?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/workspaces/aml_ws&tid=e339bd4b-2e3b-4035-a452-2112d502f2ff
StepRun( AutoML_Regression ) Status: Running

StepRun(AutoML_Regression) Execution Summary
StepRun( AutoML_Regression ) Status: Finished

Experiment timeout reached, hence experiment stopped. Current experiment timeout: 0 hour(s) 15 minute(s)






StepRunId: 6ff91d1d-25b8-41cd-8340-776d5c6cc1ee
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/6ff91d1d-25b8-41cd-8340-776d5c6cc1ee?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/workspaces/aml_ws&tid=e339bd4b-2e3b-4035-a452-2112d502f2ff
StepRun( register_model ) Status: Running

StepRun(register_model) Execution Summary
StepRun( register_model ) Status: Finished
{'runId': '6ff91d1d-25b8-41cd-8340-776d5c6cc1ee', 'target': 'computecluster', 'status': 'Completed', 'startTimeUtc': '2022-03-01T20:28:42.20023Z', 'endTimeUtc': '2022-03-01T20:28:52.231401Z', 'services': {}, 'properties': {'ContentSnapshotId': '950a4815-744e-4d88-b7e6-0433ff9b2da0', 'StepType': 'PythonScriptStep', 'ComputeTargetType': 'AmlCompute', 'azureml.moduleid': '0656dc56-42ca-4e45-915f-7851e88aaeaa', 'azureml.moduleName': 'register_model', 'azureml.runsource': 'azureml.StepRun', 'azureml.nodeid': '0e5531d1', 'azureml.pipelinerunid': '638e2247-202f-41af-8116-ab96e

KeyboardInterrupt: 

## Examine Results

In [None]:
metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)
num_file_downloaded = metrics_output.download('.', show_progress=True)

In [None]:
import json
import pandas as pd

with open(metrics_output._path_on_datastore) as f:
    metrics_output_result = f.read()
    
deserialized_metrics_output = json.loads(metrics_output_result)
df = pd.DataFrame(deserialized_metrics_output)
df

## Deploy Model as a webservice - Real Time Inference

In [21]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))


Ready to use Azure ML 1.38.0 to work with aml_ws


In [22]:
from azureml.core import Model

for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

sekurit_automl_model version: 4


sekurit_automl_model version: 3


sekurit_automl_model version: 2


sekurit_automl_model version: 1


sekurit_model version: 7
	 Training context : Pipeline
	 Accuracy : 0.5057803468208093


sekurit_model_initial version: 2


AutoML0b24ea5a20 version: 1


amlstudio-test-deploy-v2 version: 1
	 CreatedByAMLStudio : true


amlstudio-test-endpoint-lr version: 1
	 CreatedByAMLStudio : true


diabetes_model version: 11
	 Training context : Inline Training
	 AUC : 0.8832778417290374
	 Accuracy : 0.8991111111111111


diabetes_mitigated_19 version: 1


diabetes_mitigated_18 version: 1


diabetes_mitigated_17 version: 1


diabetes_mitigated_16 version: 1


diabetes_mitigated_15 version: 1


diabetes_mitigated_14 version: 1


diabetes_mitigated_13 version: 1


diabetes_mitigated_12 version: 1


diabetes_mitigated_11 version: 1


diabetes_mitigated_10 version: 1


diabetes_mitigated_9 version: 1


diabetes_mitigated_8 version: 1


diabetes_mitigated_7 version: 1



In [23]:
model = ws.models['sekurit_automl_model']
print(model.name, 'version', model.version)

sekurit_automl_model version 4


In [24]:
import os
# Create a folder for the inference config files
deployment_folder = 'deployment_automl'
os.makedirs(deployment_folder, exist_ok=True)

print(deployment_folder)

deployment_automl


In [25]:
%%writefile $deployment_folder/score.py

import json
import numpy as np
import pandas as pd
import joblib
from azureml.core.model import Model
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.schema_decorators import input_schema, output_schema


input_sample = [{
    "Model": "Grand Punto",
    "Length": 4000,
    "Type":"Compact-Regular",
    "Style":"Hatchback",
    "OEM":"Fiat",
    "Engine Disp":1.4,
    "Age of OEM":8,
    "Mileage":16,
    "Oil Price":102.1,
    "Petrol":"Y", 
    "Automatic":"N",
    "Price":5
  }]

output_sample = [[1207]]



def init():
    global model
    model_path = Model.get_model_path("sekurit_automl_model")
    # deserialize the model file back into a sklearn model
    model = joblib.load(model_path)



# Inference_schema generates a schema for your web service
# It then creates an OpenAPI (Swagger) specification for the web service
# at http://<scoring_base_url>/swagger.json

@input_schema('data', StandardPythonParameterType(input_sample))
@output_schema(StandardPythonParameterType(output_sample))

def run(data):
    try:
        df = pd.DataFrame(data)
        pred = model.predict(df)
        result = {"predict":pred.tolist()}
        return result
    except Exception as e:
        error = str(e)
        return error

 

Overwriting deployment_automl/score.py


In [26]:
import os
# Create a folder for the inference config files
config_folder = 'config_automl'
os.makedirs(config_folder, exist_ok=True)

print(config_folder)

config_automl


In [27]:
%%writefile $config_folder/inference_env.yml

name: experiment_env
dependencies:
- python=3.6.2
- scikit-learn
- ipykernel
- matplotlib
- pandas
- pip
- pip:
  - azureml-defaults
  - azureml-sdk[automl]
  - pyarrow

Overwriting config_automl/inference_env.yml


In [30]:
# Define deployment setup
from azureml.core import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
import json
import numpy as np
import pandas as pd
import joblib
from azureml.core.model import Model
    
# Conda enviroment (if we want to use additional Python packages)
env = Environment.from_conda_specification("inference-env", config_folder + "/inference_env.yml")

inference_config = InferenceConfig(entry_script= deployment_folder +'/score.py', environment=env)

# Define how our webservice should look like (resources, security, etc.)
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

In [31]:
service = Model.deploy(ws, "sekurit-automl-service", [model], inference_config, deployment_config)
service.wait_for_deployment(show_output=True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-03-01 20:33:30+00:00 Creating Container Registry if not exists.
2022-03-01 20:33:31+00:00 Registering the environment.
2022-03-01 20:33:33+00:00 Use the existing image.
2022-03-01 20:33:33+00:00 Generating deployment configuration.
2022-03-01 20:33:34+00:00 Submitting deployment to compute..
2022-03-01 20:33:47+00:00 Checking the status of deployment sekurit-automl-service..
2022-03-01 20:36:12+00:00 Checking the status of inference endpoint sekurit-automl-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [34]:
import requests
import json

url = service.scoring_uri

# test_data = {
#   'data': [{
#     "Model": "Grand Punto",
#     "Length": 4000,
#     "Type":"Compact-Regular",
#     "Style":"Hatchback",
#     "OEM":"Fiat",
    
#     "Engine Disp":1.4,
#     "Age of OEM":8,  
#     "Mileage":16,
#     "Oil Price":102.1,
#     "Petrol":"Y", 
#     "Automatic":"N",
#     "Price":5
#   }]
# }


test_data = {
  'data': [{
    "Model": 0,
    "Length": 4000,
    "Type":0,
    "Style":0,
    "OEM":0,
    "Engine Disp":1.4,
    "Age of OEM":8,  
    "Mileage":16,
    "Oil Price":102.1,
    "Petrol":0, 
    "Automatic":0,
    "Price":5
  }]
}


headers = {'Content-Type':'application/json'}
resp = requests.post(url, json=test_data, headers=headers)

print("Prediction:", resp.text)

Prediction: {"predict": [281.9847222222222]}
