# Machine Learning Lab - Automated ML in Azure ML Studio

## Get Workspace Info

In [6]:
# Import required libraries
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient

from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import automl
from azure.ai.ml import Input

import pandas as pd

In [2]:
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential)

Found the config file in: /config.json


In [3]:
workspace = ml_client.workspaces.get(name=ml_client.workspace_name)

## Preview Yourr Flat File Data

In [7]:
uri = "azureml://subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001/datastores/stodsba6190class/paths/instructor/diabetes.csv"
df = pd.read_csv(uri)
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


## Create an MLTable Object

In [27]:
import mltable

# create a table from the parquet paths
tbl = mltable.from_delimited_files(paths = [{'pattern': uri}], header='all_files_same_headers', delimiter=',')

# print the first 5 records of the table as a check
tbl.show(5)

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [29]:
mltable_uri = "azureml://subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001/datastores/stodsba6190class/paths/instructor/diabetes_mltable"
tbl.save(path=mltable_uri, colocated=True, show_progress=True, overwrite=True)

Copying 1 files with concurrency set to 1
Copied stodsba6190class/instructor/diabetes.csv, file 1 out of 1. Destination path: https://stodsba6190class.blob.core.windows.net/datalake/instructor/diabetes_mltable/stodsba6190class/instructor/diabetes.csv
Files copied=1, skipped=0, failed=0
Copying 1 files with concurrency set to 1
Copied /tmp/tmp_kql72tz/MLTable, file 1 out of 1. Destination path: https://stodsba6190class.blob.core.windows.net/datalake/instructor/diabetes_mltable/MLTable
Files copied=1, skipped=0, failed=0


paths:
- pattern: azureml://subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001/datastores/stodsba6190class/paths/instructor/diabetes.csv
transformations:
- read_delimited:
    delimiter: ','
    empty_as_string: false
    encoding: utf8
    header: all_files_same_headers
    include_path_column: false
    infer_column_types: true
    partition_size: 20971520
    path_column: Path
    support_multi_line: false
type: mltable

## Create Experiment

In [30]:
my_training_data_input = Input(type=AssetTypes.MLTABLE, path=mltable_uri)

In [31]:
# general job parameters
max_trials = 5
exp_name = "instructor-diabetes-experiment"

In [38]:
regression_job = automl.regression(
    experiment_name=exp_name,
    training_data=my_training_data_input,
    target_column_name="target",
    primary_metric="R2Score",
    # n_cross_validations=5,
    enable_model_explainability=True
)

# Limits are all optional
regression_job.set_limits(
    timeout_minutes=600,
    trial_timeout_minutes=20,
    max_trials=max_trials,
    # max_concurrent_trials = 4,
    # max_cores_per_trial: -1,
    enable_early_termination=True,
)

## Submit Job

In [39]:
# Submit the AutoML job
returned_job = ml_client.jobs.create_or_update(
    regression_job
)  # submit the job to the backend

print(f"Created job: {returned_job}")

Created job: compute: azureml:/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001/computes/
creation_context:
  created_at: '2024-10-02T18:33:47.649177+00:00'
  created_by: Colby Ford
  created_by_type: User
display_name: strong_line_rf29xlpdw0
experiment_name: instructor-diabetes-experiment
id: azureml:/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001/jobs/strong_line_rf29xlpdw0
limits:
  enable_early_termination: true
  max_concurrent_trials: 1
  max_cores_per_trial: -1
  max_nodes: 1
  max_trials: 5
  timeout_minutes: 600
  trial_timeout_minutes: 20
log_verbosity: info
name: strong_line_rf29xlpdw0
outputs: {}
primary_metric: r2_score
properties: {}
queue_settings:
  job_tier: 'null'
resources:
  instance_count: 

In [40]:
# Wait for job to complete and stream updates
ml_client.jobs.stream(returned_job.name)

RunId: strong_line_rf29xlpdw0
Web View: https://ml.azure.com/runs/strong_line_rf29xlpdw0?wsid=/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001

Execution Summary
RunId: strong_line_rf29xlpdw0
Web View: https://ml.azure.com/runs/strong_line_rf29xlpdw0?wsid=/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001



In [41]:
# Get a URL for the status of the job
returned_job.services["Studio"].endpoint

'https://ml.azure.com/runs/strong_line_rf29xlpdw0?wsid=/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001&tid=88d59d7d-aecb-41b2-90c5-55595de02536'

In [42]:
print(returned_job.name)

strong_line_rf29xlpdw0


## Get Best Model

In [43]:
## Obtain the tracking URI for MLFlow
import mlflow

# Obtain the tracking URL from MLClient
MLFLOW_TRACKING_URI = ml_client.workspaces.get(
    name=ml_client.workspace_name
).mlflow_tracking_uri

print(MLFLOW_TRACKING_URI)

azureml://eastus.api.azureml.ms/mlflow/v1.0/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001


In [44]:
# Set the MLFLOW TRACKING URI

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

print("\nCurrent tracking uri: {}".format(mlflow.get_tracking_uri()))


Current tracking uri: azureml://eastus.api.azureml.ms/mlflow/v1.0/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001


In [45]:
from mlflow.tracking.client import MlflowClient
from mlflow.artifacts import download_artifacts

# Initialize MLFlow client
mlflow_client = MlflowClient()

  from google.protobuf import service as _service


In [46]:
# Get the AutoML parent Job
job_name = returned_job.name

# Example if providing an specific Job name/ID
# job_name = "b4e95546-0aa1-448e-9ad6-002e3207b4fc"

# Get the parent run
mlflow_parent_run = mlflow_client.get_run(job_name)

print("Parent Run: ")
print(mlflow_parent_run)

Parent Run: 
<Run: data=<RunData: metrics={'explained_variance': 0.4066304793439654,
 'mean_absolute_error': 47.68803490259772,
 'mean_absolute_percentage_error': 42.50887432554903,
 'median_absolute_error': 40.378502711263664,
 'normalized_mean_absolute_error': 0.14856085639438543,
 'normalized_median_absolute_error': 0.12578972807247252,
 'normalized_root_mean_squared_error': 0.1833608242948157,
 'normalized_root_mean_squared_log_error': 0.16958787655458055,
 'r2_score': 0.39160689582214864,
 'root_mean_squared_error': 58.85882459863584,
 'root_mean_squared_log_error': 0.4394408952163837,
 'spearman_correlation': 0.6298372225733961}, params={}, tags={'_azureml.ComputeTargetType': '',
 'automl_best_child_run_id': 'strong_line_rf29xlpdw0_4',
 'fit_time_000': '0.03377429999999999;0.3471856000000001;0.35274910000000004;3;3',
 'iteration_000': '0;1;2;3;4',
 'mlflow.rootRunId': 'strong_line_rf29xlpdw0',
 'mlflow.runName': 'strong_line_rf29xlpdw0',
 'mlflow.user': 'Colby Ford',
 'model_expl

In [47]:
# Print parent run tags. 'automl_best_child_run_id' tag should be there.
print(mlflow_parent_run.data.tags)

{'model_explain_run': 'best_run', '_azureml.ComputeTargetType': '', 'pipeline_id_000': 'faf12f74cf9bbd358ca5525682c5030d36f7be7c;4bc4ec47eb8df2d5d68b361cd60120e65196f757;5cc37daec73ea64276ef956449645cdb519fdfc6;__AutoML_Ensemble__;__AutoML_Stack_Ensemble__', 'score_000': '0.37403560668002445;0.30279414163217666;-1;0.3787865947255294;0.39160689582214864', 'predicted_cost_000': '0;0;0.5;0;0', 'fit_time_000': '0.03377429999999999;0.3471856000000001;0.35274910000000004;3;3', 'training_percent_000': '100;100;100;100;100', 'iteration_000': '0;1;2;3;4', 'run_preprocessor_000': 'MaxAbsScaler;MaxAbsScaler;StandardScalerWrapper;;', 'run_algorithm_000': 'LightGBM;XGBoostRegressor;XGBoostRegressor;VotingEnsemble;StackEnsemble', 'automl_best_child_run_id': 'strong_line_rf29xlpdw0_4', 'mlflow.rootRunId': 'strong_line_rf29xlpdw0', 'mlflow.runName': 'strong_line_rf29xlpdw0', 'mlflow.user': 'Colby Ford'}


In [48]:
# Get the AutoML best child run

best_child_run_id = mlflow_parent_run.data.tags["automl_best_child_run_id"]
print("Found best child run id: ", best_child_run_id)

best_run = mlflow_client.get_run(best_child_run_id)

print("Best child run: ")
print(best_run)

Found best child run id:  strong_line_rf29xlpdw0_4
Best child run: 
<Run: data=<RunData: metrics={'explained_variance': 0.4066304793439654,
 'mean_absolute_error': 47.68803490259772,
 'mean_absolute_percentage_error': 42.50887432554903,
 'median_absolute_error': 40.378502711263664,
 'normalized_mean_absolute_error': 0.14856085639438543,
 'normalized_median_absolute_error': 0.12578972807247252,
 'normalized_root_mean_squared_error': 0.1833608242948157,
 'normalized_root_mean_squared_log_error': 0.16958787655458055,
 'r2_score': 0.39160689582214864,
 'root_mean_squared_error': 58.85882459863584,
 'root_mean_squared_log_error': 0.4394408952163837,
 'spearman_correlation': 0.6298372225733961}, params={}, tags={'mlflow.parentRunId': 'strong_line_rf29xlpdw0',
 'mlflow.rootRunId': 'strong_line_rf29xlpdw0',
 'mlflow.runName': 'serene_book_gqtg3l8t',
 'mlflow.source.name': 'automl_driver.py',
 'mlflow.source.type': 'JOB',
 'mlflow.user': 'Colby Ford'}>, info=<RunInfo: artifact_uri='azureml://ea

In [49]:
# Get best model run's metrics
best_run.data.metrics

{'root_mean_squared_log_error': 0.4394408952163837,
 'normalized_median_absolute_error': 0.12578972807247252,
 'normalized_root_mean_squared_log_error': 0.16958787655458055,
 'spearman_correlation': 0.6298372225733961,
 'mean_absolute_error': 47.68803490259772,
 'root_mean_squared_error': 58.85882459863584,
 'r2_score': 0.39160689582214864,
 'median_absolute_error': 40.378502711263664,
 'normalized_mean_absolute_error': 0.14856085639438543,
 'normalized_root_mean_squared_error': 0.1833608242948157,
 'explained_variance': 0.4066304793439654,
 'mean_absolute_percentage_error': 42.50887432554903}

## Deploying the Best Model as an API Endpoint

### Create Managed Endpoint

In [64]:
# import required libraries
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    ProbeSettings,
)
from azure.ai.ml.constants import ModelType

In [65]:
# Creating a unique endpoint name with current datetime to avoid conflicts
import datetime

online_endpoint_name = "instructor-diabetes-endpoint-01"

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="online endpoint for the instructor diabetes AutoML model",
    auth_mode="key"
)

In [66]:
ml_client.begin_create_or_update(endpoint).result()

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://instructor-diabetes-endpoint-01.eastus.inference.ml.azure.com/score', 'openapi_uri': 'https://instructor-diabetes-endpoint-01.eastus.inference.ml.azure.com/swagger.json', 'name': 'instructor-diabetes-endpoint-01', 'description': 'online endpoint for the instructor diabetes AutoML model', 'tags': {}, 'properties': {'createdBy': 'Colby Ford', 'createdAt': '2024-10-02T20:02:24.462416+0000', 'lastModifiedAt': '2024-10-02T20:02:24.462416+0000', 'azureml.onlineendpointid': '/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/providers/microsoft.machinelearningservices/workspaces/mls-dsba6190-class-dev-eastus-001/onlineendpoints/instructor-diabetes-endpoint-01', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/providers/Microsoft.MachineLearningServices/locations/eastus/mf

### Register Best Model

In [67]:
model_name = "instructor-diabetes-model"
model = Model(
    path=f"azureml://jobs/{best_run.info.run_id}/outputs/artifacts/outputs/mlflow-model/",
    name=model_name,
    description="Instructor diabetes regression AutoML model",
    type=AssetTypes.MLFLOW_MODEL,
)

registered_model = ml_client.models.create_or_update(model)

In [68]:
registered_model.id

'/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001/models/instructor-diabetes-model/versions/2'

### Deploy

In [69]:
deployment = ManagedOnlineDeployment(
    name="instructor-diabetes-deploy",
    endpoint_name=online_endpoint_name,
    model=registered_model.id,
    instance_type="Standard_DS3_V2",
    instance_count=1,
    liveness_probe=ProbeSettings(
        failure_threshold=30,
        success_threshold=1,
        timeout=2,
        period=10,
        initial_delay=2000,
    ),
    readiness_probe=ProbeSettings(
        failure_threshold=10,
        success_threshold=1,
        timeout=10,
        period=10,
        initial_delay=2000,
    ),
)

In [70]:
ml_client.online_deployments.begin_create_or_update(deployment).result()

Check: endpoint instructor-diabetes-endpoint-01 exists


........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

ManagedOnlineDeployment({'private_network_connection': None, 'package_model': False, 'provisioning_state': 'Succeeded', 'endpoint_name': 'instructor-diabetes-endpoint-01', 'type': 'Managed', 'name': 'instructor-diabetes-deploy', 'description': None, 'tags': {}, 'properties': {'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/odidp:25d84a7a-c070-46c4-b283-0de91b6aa4e9:9ec20e6b-cd99-4bec-9341-5f711a55ac41?api-version=2023-04-01-preview'}, 'print_as_yaml': False, 'id': '/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001/onlineEndpoints/instructor-diabetes-endpoint-01/deployments/instructor-diabetes-deploy', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/cford38-compute/code'

In [71]:
endpoint.traffic = {"instructor-diabetes-deploy": 100}
ml_client.begin_create_or_update(endpoint).result()

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://instructor-diabetes-endpoint-01.eastus.inference.ml.azure.com/score', 'openapi_uri': 'https://instructor-diabetes-endpoint-01.eastus.inference.ml.azure.com/swagger.json', 'name': 'instructor-diabetes-endpoint-01', 'description': 'online endpoint for the instructor diabetes AutoML model', 'tags': {}, 'properties': {'createdBy': 'Colby Ford', 'createdAt': '2024-10-02T20:02:24.462416+0000', 'lastModifiedAt': '2024-10-02T20:02:24.462416+0000', 'azureml.onlineendpointid': '/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/providers/microsoft.machinelearningservices/workspaces/mls-dsba6190-class-dev-eastus-001/onlineendpoints/instructor-diabetes-endpoint-01', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/providers/Microsoft.MachineLearningServices/locations/eastus/mf

### Test the API

In [75]:
import pandas as pd

uri = "azureml://subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001/datastores/stodsba6190class/paths/instructor/diabetes.csv"
test_df = pd.read_csv(uri).head(2)

test_data_json = test_df.to_json(orient="records", indent=4)
data = (
    '{ \
          "input_data": {"data": '
    + test_data_json
    + "}}"
)

In [76]:
request_file_name = "sample-request-diabetes.json"

with open(request_file_name, "w") as request_file:
    request_file.write(data)

# test the blue deployment with some sample data
ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    deployment_name="instructor-diabetes-deploy",
    request_file="sample-request-diabetes.json",
)

'[158.49739290029717, 87.1367118232485]'

### Get Endpoint Details

In [77]:
# Get the details for online endpoint
endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)

# existing traffic details
print(endpoint.traffic)

# Get the scoring URI
print(endpoint.scoring_uri)

{'instructor-diabetes-deploy': 100}
https://instructor-diabetes-endpoint-01.eastus.inference.ml.azure.com/score


In [None]:
# Delete the deployment and endpoint
# ml_client.online_endpoints.begin_delete(name=online_endpoint_name)