## Deploy a model to an Local endpoint, using Azure Machine Learning Python SDK v2.
### Example with fMRI use case
For reference, [click here](https://learn.microsoft.com/en-us/azure/machine-learning/tutorial-deploy-model?view=azureml-api-2)

In [None]:
! pip install azure-ai-ml

In [1]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    OnlineRequestSettings
)
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

In [2]:
# enter details of your AML workspace
subscription_id = "<SUBSCRIPTION_ID>"
resource_group = "<RESOURCE_GROUP>"
workspace_name = "<AML_WORKSPACE_NAME>"

# authenticate
credential = DefaultAzureCredential()

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name,
)

In [3]:
# configure an environment
env = Environment(
    conda_file="conda_dep_opti.yml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    )

# configure an inference configuration with a scoring script
code_config = CodeConfiguration(
        code="fmri_score_code",
        scoring_script="score_opti.py"
    )   

In [4]:
folder_data_model_path="../fmri-data-pt-onnx-ov-models"

local_model = Model(
    path=folder_data_model_path,
    type=AssetTypes.CUSTOM_MODEL,
    name="fmri-data-pt-onnx-ov-v2sdk",
    version="1",
    description="SDKv2-fmri-data-pt-onnx-ov-models with PT, ONNX and OV models of fMRI - final25D model. Also includes 100 IC_niftis test volumes (*.nii.gz)"
)

In [5]:
endpoint_name = "fmri-pt-ipex-ov-local-sdk-v2"
# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name = endpoint_name, 
    description="this is local: fmri-pt-ipex-ov-local-sdk-v2",
    auth_mode="key"
)

poller = ml_client.online_endpoints.begin_create_or_update(endpoint, local=True)
#poller.wait()

Updating local endpoint (fmri-pt-ipex-ov-local-sdk-v2) .Done (0m 5s)


In [9]:

req_settings = OnlineRequestSettings(request_timeout_ms=36000)

# Define a deployment
blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=endpoint_name,
    model=local_model,
    environment=env,
    code_configuration=code_config,
    instance_type="Standard_FX4mds", #Standard_FX4mds, Standard_F2s_v2
    instance_count=1,
    request_settings=req_settings
)


# create the deployment:
ml_client.begin_create_or_update(blue_deployment, local=True)



Updating local deployment (fmri-pt-ipex-ov-local-sdk-v2 / blue) .
Building Docker image from Dockerfile
Step 1/6 : FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest
 ---> 2a70613828e9
Step 2/6 : RUN mkdir -p /var/azureml-app/
 ---> Using cache
 ---> b9e650e0cd57
Step 3/6 : WORKDIR /var/azureml-app/
 ---> Using cache
 ---> dd0dc0799daf
Step 4/6 : COPY conda.yml /var/azureml-app/
 ---> Using cache
 ---> 53af016f6d16
Step 5/6 : RUN conda env create -n inf-conda-env --file conda.yml
 ---> Using cache
 ---> 1768010f34bd
Step 6/6 : CMD ["conda", "run", "--no-capture-output", "-n", "inf-conda-env", "runsvdir", "/var/runit"]
 ---> Using cache
 ---> 1c963946291f
Successfully built 1c963946291f
Successfully tagged fmri-pt-ipex-ov-local-sdk-v2:blue

Starting up endpoint...Done (0m 20s)


ManagedOnlineDeployment({'private_network_connection': None, 'provisioning_state': 'Succeeded', 'endpoint_name': 'fmri-pt-ipex-ov-local-sdk-v2', 'type': 'Managed', 'name': 'blue', 'description': None, 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/mnt/batch/tasks/shared/LS_root/mounts/clusters/ravi-nuance/code/Users/ravi.panchumarthy/03.fMRI.RestingStateClassification/4.2.Deploy_the_model(optimized)_sdk_v2'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7fb025051460>, 'model': Model({'job_name': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'fmri-data-pt-onnx-ov-v2sdk', 'description': 'SDKv2-fmri-data-pt-onnx-ov-models with PT, ONNX and OV models of fMRI - final25D model. Also includes 100 IC_niftis test volumes (*.nii.gz)', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Re

In [10]:
# blue deployment takes 100% traffic
endpoint.traffic = {"blue": 100}
ml_client.begin_create_or_update(endpoint, local=True)

Updating local endpoint (fmri-pt-ipex-ov-local-sdk-v2) .Done (0m 5s)


ManagedOnlineEndpoint({'public_network_access': None, 'provisioning_state': 'Succeeded', 'scoring_uri': 'http://localhost:32782/score', 'openapi_uri': None, 'name': 'fmri-pt-ipex-ov-local-sdk-v2', 'description': 'this is local: fmri-pt-ipex-ov-local-sdk-v2', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/mnt/batch/tasks/shared/LS_root/mounts/clusters/ravi-nuance/code/Users/ravi.panchumarthy/03.fMRI.RestingStateClassification/4.2.Deploy_the_model(optimized)_sdk_v2'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7fb024ff1940>, 'auth_mode': 'key', 'location': 'local', 'identity': None, 'traffic': {'blue': 100}, 'mirror_traffic': {}, 'kind': None})

In [11]:
ml_client.online_endpoints.get(name=endpoint_name, local=True)

ManagedOnlineEndpoint({'public_network_access': None, 'provisioning_state': 'Succeeded', 'scoring_uri': 'http://localhost:32782/score', 'openapi_uri': None, 'name': 'fmri-pt-ipex-ov-local-sdk-v2', 'description': 'this is local: fmri-pt-ipex-ov-local-sdk-v2', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/mnt/batch/tasks/shared/LS_root/mounts/clusters/ravi-nuance/code/Users/ravi.panchumarthy/03.fMRI.RestingStateClassification/4.2.Deploy_the_model(optimized)_sdk_v2'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7fb02c09b9a0>, 'auth_mode': 'key', 'location': 'local', 'identity': None, 'traffic': {'blue': 100}, 'mirror_traffic': {}, 'kind': None})

In [None]:
ml_client.online_deployments.get_logs(
    name="blue", endpoint_name=endpoint_name, local=True, lines=500
)

In [13]:
# Get the details for online endpoint
endpoint_deployed = ml_client.online_endpoints.get(name=endpoint_name, local=True)

# existing traffic details
print(endpoint_deployed.traffic)

# Get the scoring URI
print(endpoint_deployed.scoring_uri)

{'blue': 100}
http://localhost:32782/score


In [None]:
import requests
import json

# resp = requests.post(scoring_uri, input_data, headers=headers)
scoring_uri = endpoint_deployed.scoring_uri

# Send HTTP request and obtain results from endpoint.
# Note: in this example, the input data is already in the container uploaded along with the models during model registration.
response = requests.post(scoring_uri)
output_dict = json.loads(response.content)
print(json.dumps(output_dict, indent=4))

In [None]:
import json
output_dict = json.loads(response.content)

pt_metrics = output_dict['pt_summary']
ipex_metrics = output_dict['ipex_summary']
ov_metrics = output_dict['ov_summary']

print(f"PyTorch Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['PyTorch']}")
print(f"\tNum Subjects:\t{pt_metrics['num_subjects']}")
print(f"\tTest Accuracy:\t{pt_metrics['test_accuracy']}")
print(f"\tTime Taken:\t{pt_metrics['time_sec']:.4f} sec")


print(f"\nIPEX Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['IPEX']}")
print(f"\tNum Subjects:\t{ipex_metrics['num_subjects']}")
print(f"\tTest Accuracy:\t{ipex_metrics['test_accuracy']}")
print(f"\tTime Taken:\t{ipex_metrics['time_sec']:.4f} sec")

print(f"\nOpenVINO Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['OpenVINO']}")
print(f"\tNum Subjects:\t{ov_metrics['num_subjects']}")
print(f"\tTest Accuracy:\t{ov_metrics['test_accuracy']}")
print(f"\tTime Taken:\t{ov_metrics['time_sec']:.4f} sec")

# Calculate the speedup with IPEX compared to PyTorch
ipex_fps_speedup = pt_metrics['time_sec'] / ipex_metrics['time_sec']
print(f"\nSpeedup with IPEX: {ipex_fps_speedup:.2f}x")

# Calculate the speedup with OpenVINO compared to PyTorch
ov_fps_speedup = pt_metrics['time_sec'] / ov_metrics['time_sec']
print(f"\nSpeedup with OpenVINO: {ov_fps_speedup:.2f}x")

In [None]:
#Print System info
lscpu_out=output_dict['system_info']['lscpu_out'].encode().decode('unicode_escape')
print(f"\nSystem Info:\n{lscpu_out}")

mem_out_gb=output_dict['system_info']['mem_out_gb'].encode().decode('unicode_escape')
print(f"\nSystem Memory Info (GB):\n{mem_out_gb}")

os_out=output_dict['system_info']['os'].encode().decode('unicode_escape')
print(f"\nSystem OS:\n{os_out}")

In [None]:
ml_client.online_endpoints.begin_delete(name=endpoint_name)