## Configuring Data Collector in Azure Machine Learning (AzureML)

### System configuration

In [1]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    Data,
    DataCollector,
    DeploymentCollection
)
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

In [None]:
# enter details of your Azure Machine Learning workspace
subscription_id = ""
resource_group_name = ""
workspace_name = ""

In [3]:
# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(),
    subscription_id = subscription_id,
    resource_group_name = resource_group_name,
    workspace_name = workspace_name
)
ml_client

MLClient(credential=<azure.identity._credentials.default.DefaultAzureCredential object at 0x7f6255880280>,
         subscription_id=e0d7a68e-191f-4f51-83ce-d93995cd5c09,
         resource_group_name=rg_mip,
         workspace_name=ws_mip)

In [None]:
# define an endpoint name
endpoint_name = "endpoint-data-collector-test"

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name = endpoint_name, 
    description="Endpoint to test data collector",
    auth_mode="key"
)
endpoint

ManagedOnlineEndpoint({'public_network_access': None, 'provisioning_state': None, 'scoring_uri': None, 'openapi_uri': None, 'name': 'endpoint-data-collector-ez-test', 'description': 'Endpoint to test data collector', 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': None, 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/ezzatdemnati-ci2/code/Users/ezzatdemnati/AzureML-Monitoring-DataCollector', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7f6254286ce0>, 'auth_mode': 'key', 'location': None, 'identity': None, 'traffic': {}, 'mirror_traffic': {}, 'kind': None})

### Option 1: Local model deployment

In [47]:
# define local model and environment
model = Model(path = "./model/sklearn_regression_model.pkl")

env = Environment(
    conda_file = "./environment/conda.yaml",
    name="my_env",
    image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)

### Option 2: Registered model deployment

In [58]:
file_model = Model(
    path="./model/",
    type=AssetTypes.CUSTOM_MODEL,
    name="scikit-model",
    description="SciKit model created from local file",
)
my_model = ml_client.models.create_or_update(file_model)
my_model

Model({'job_name': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'scikit-model', 'description': 'SciKit model created from local file', 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/e0d7a68e-191f-4f51-83ce-d93995cd5c09/resourceGroups/rg_mip/providers/Microsoft.MachineLearningServices/workspaces/ws_mip/models/scikit-model/versions/3', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/ezzatdemnati-ci2/code/Users/ezzatdemnati/AzureML-Monitoring-DataCollector', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f623b55d3c0>, 'serialize': <msrest.serialization.Serializer object at 0x7f623b55f4c0>, 'version': '3', 'latest_version': None, 'path': 'azureml://subscriptions/e0d7a68e-191f-4f51-83ce-d93995cd5c09/resourceGroups/rg_mip/workspaces/ws_mip/datastores/workspaceblobstore/paths/LocalUpload/feaceb82707cfcabaae

In [None]:
from azure.ai.ml.entities import Environment

env_docker_conda = Environment(
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04",
    conda_file="./environment/conda.yaml",
    name="scikit-env",
    description="SciKit env from Docker image and Conda file",
)
my_env = ml_client.environments.create_or_update(env_docker_conda)

Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': 'mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04', 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'scikit-env', 'description': 'SciKit env from Docker image and Conda file', 'tags': {}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/e0d7a68e-191f-4f51-83ce-d93995cd5c09/resourceGroups/rg_mip/providers/Microsoft.MachineLearningServices/workspaces/ws_mip/environments/scikit-env/versions/1', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/ezzatdemnati-ci2/code/Users/ezzatdemnati/AzureML-Monitoring-DataCollector', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f62547562c0>, 'serialize': <msrest.serialization.Serializer object at 0x7f6254754700>, 'version': '1', 'conda_file': {'channels': ['conda-forge'], 'dependencie

In [38]:
# define registered model and environment
#model = "scikit-model:1"
#env = "scikit-env:1"

### Configuring managed endpoint

In [59]:
# define data collector
# input_data_asset = Data(
#     name = 'model_inputs',
#     version ='1',
#     path = 'azureml://datastores/model_inputs/paths/model_inputs'
# )
# output_data_asset = Data(
#     name = 'model_outputs',
#     version = '1',
#     path = 'azureml://datastores/model_inputs/paths/model_outputs'
# )

collections = {
    'model_inputs': DeploymentCollection(
        enabled="true",
        # data=input_data_asset
    ),
    'model_outputs': DeploymentCollection(
        enabled="true",
        # data=output_data_asset
    )
}

data_collector = DataCollector(collections=collections)

In [60]:
# create endpoint
ml_client.online_endpoints.begin_create_or_update(endpoint)

<azure.core.polling._poller.LROPoller at 0x7f623b8bbfd0>

In [66]:
# check endpoint
my_endpoint = ml_client.online_endpoints.get(name=endpoint_name)

In [62]:
# define deployment
blue_deployment = ManagedOnlineDeployment(
    name="blue-dep",
    endpoint_name=endpoint_name,
    model=my_model,
    environment=my_env,
    code_configuration=CodeConfiguration(
        code="./onlinescoring", scoring_script="score_datacollector.py"
    ),
    instance_type="Standard_DS2_v2",
    instance_count=1,
    data_collector=data_collector
)

In [63]:
# create deployment
ml_client.online_deployments.begin_create_or_update(blue_deployment)

Instance type Standard_DS2_v2 may be too small for compute resources. Minimum recommended compute SKU is Standard_DS3_v2 for general purpose endpoints. Learn more about SKUs here: https://learn.microsoft.com/en-us/azure/machine-learning/referencemanaged-online-endpoints-vm-sku-list
Check: endpoint endpoint-data-collector-ez-test exists
[32mUploading onlinescoring (0.0 MBs): 100%|██████████| 2618/2618 [00:00<00:00, 58571.17it/s]
[39m



<azure.core.polling._poller.LROPoller at 0x7f623bbbf3d0>

.....................................................................................................

In [69]:
endpoint = ml_client.online_endpoints.get(endpoint_name)
endpoint

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://endpoint-data-collector-ez-test.eastus.inference.ml.azure.com/score', 'openapi_uri': 'https://endpoint-data-collector-ez-test.eastus.inference.ml.azure.com/swagger.json', 'name': 'endpoint-data-collector-ez-test', 'description': 'Endpoint to test data collector', 'tags': {}, 'properties': {'createdBy': 'Ezzat Demnati', 'createdAt': '2024-11-26T18:02:24.162367+0000', 'lastModifiedAt': '2024-11-26T18:02:24.162367+0000', 'azureml.onlineendpointid': '/subscriptions/e0d7a68e-191f-4f51-83ce-d93995cd5c09/resourcegroups/rg_mip/providers/microsoft.machinelearningservices/workspaces/ws_mip/onlineendpoints/endpoint-data-collector-ez-test', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/e0d7a68e-191f-4f51-83ce-d93995cd5c09/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/oeidp:deaf618c-e3a5-4db2-b672-b0ad927d2ce3:37db706d-2976-

In [78]:
# blue deployment takes 100 traffic
endpoint.traffic = {"blue-dep": 100}
ml_client.online_endpoints.begin_create_or_update(endpoint)

Readonly attribute principal_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>
Readonly attribute tenant_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>


<azure.core.polling._poller.LROPoller at 0x7f623b542320>

In [74]:
# test the blue deployment with sample data
ml_client.online_endpoints.invoke(
    endpoint_name=endpoint_name,
    deployment_name="blue-dep",
    request_file="./sample-request.json",
)

'[11055.977245525679, 4503.079536107787]'

### Delete managed endpoint deployment

In [75]:
# delete deployment
#ml_client.online_endpoints.begin_delete(name=endpoint_name)