## Deploy a model to an online endpoint, using Azure Machine Learning Python SDK v2.
### Example with fMRI use case
For reference, [click here](https://learn.microsoft.com/en-us/azure/machine-learning/tutorial-deploy-model?view=azureml-api-2)

#### Prerequisites

In [None]:
! pip install azure-ai-ml

In [1]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    OnlineRequestSettings
)
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

In [2]:
# enter details of your AML workspace
subscription_id = "<SUBSCRIPTION_ID>"
resource_group = "<RESOURCE_GROUP>"
workspace_name = "<AML_WORKSPACE_NAME>"

# authenticate
credential = DefaultAzureCredential()

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name,
)

In [3]:
endpoint_name = "fmri-pt-ipex-ov-sdk-v2-1"

In [4]:
# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name = endpoint_name, 
    description="this is online endpoint: fmri-pt-ipex-ov-sdk-v2",
    auth_mode="key"
)

poller = ml_client.online_endpoints.begin_create_or_update(endpoint)
poller.wait()

In [None]:
# Configure a model

folder_data_model_path="../fmri-data-pt-onnx-ov-models"

model = Model(
    path=folder_data_model_path,
    type=AssetTypes.CUSTOM_MODEL,
    name="fmri-data-pt-onnx-ov-v2sdk",
    version="1",
    description="SDKv2-fmri-data-pt-onnx-ov-models with PT, ONNX and OV models of fMRI - final25D model. Also includes 100 IC_niftis test volumes (*.nii.gz)"
)
ml_client.models.create_or_update(model)


In [18]:
# Configure an environment
env = Environment(
    conda_file="conda_dep_opti.yml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    )

# configure an inference configuration with a scoring script
code_config = CodeConfiguration(
        code="fmri_score_code",
        scoring_script="score_opti-bench.py"
    )   

### Define Deployment
See VM SKUs that are supported for Azure Machine Learning managed online endpoints [here](https://learn.microsoft.com/en-us/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list?view=azureml-api-2)

In [19]:

req_settings = OnlineRequestSettings(request_timeout_ms=90000)  # 90000ms = 1.5min

# Define a deployment
blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=endpoint_name,
    model=model,
    environment=env,
    code_configuration=code_config,
    instance_type="Standard_FX4mds", #Standard_FX4mds, Standard_FX12mds, Standard_F2s_v2
    instance_count=1,
    request_settings=req_settings
)

# create the deployment:
poller = ml_client.begin_create_or_update(blue_deployment)
poller.wait()

Check: endpoint fmri-pt-ipex-ov-sdk-v2-1 exists


..........................................................................................................

In [25]:
# Set blue deployment to take 100% traffic
endpoint.traffic = {"blue": 100}
ml_client.begin_create_or_update(endpoint)

<azure.core.polling._poller.LROPoller at 0x7f296d97b550>

In [22]:
# Get Deployment logs if needed.
deployment_logs = ml_client.online_deployments.get_logs(
    name="blue", endpoint_name=endpoint_name, lines=50
)

In [34]:
# Get the details for online endpoint
deployed_endpoint = ml_client.online_endpoints.get(name=endpoint_name)

# existing traffic details
print(deployed_endpoint.traffic)

# Get the scoring URI
print(deployed_endpoint.scoring_uri)

auth_key = ml_client.online_endpoints.get_keys(endpoint_name).primary_key
print(f"Authkey:{auth_key[:10]}...")

{'blue': 100}
https://fmri-pt-ipex-ov-sdk-v2-1.eastus.inference.ml.azure.com/score
Authkey:XpnFG6LGTY...


In [None]:
import requests
import json

# resp = requests.post(scoring_uri, input_data, headers=headers)
scoring_uri = deployed_endpoint.scoring_uri

# Send HTTP request and obtain results from endpoint. 
# Note: in this example, the input data is already in the container uploaded along with the models during model registration.
response = requests.post(scoring_uri, headers={"Authorization": f"Bearer {auth_key}"}, timeout=600)
output_dict = json.loads(response.content)
print(json.dumps(output_dict, indent=4))

In [39]:
import json
output_dict = json.loads(response.content)

pt_metrics = output_dict['pt_summary']
ipex_metrics = output_dict['ipex_summary']
ov_metrics = output_dict['ov_summary']

print(f"PyTorch Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['PyTorch']}")
print(f"\tNum Subjects:\t{pt_metrics['num_subjects']}")
print(f"\tTest Accuracy:\t{pt_metrics['test_accuracy']}")
print(f"\tTime Taken:\t{pt_metrics['time_sec']:.4f} sec")


print(f"\nIPEX Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['IPEX']}")
print(f"\tNum Subjects:\t{ipex_metrics['num_subjects']}")
print(f"\tTest Accuracy:\t{ipex_metrics['test_accuracy']}")
print(f"\tTime Taken:\t{ipex_metrics['time_sec']:.4f} sec")

print(f"\nOpenVINO Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['OpenVINO']}")
print(f"\tNum Subjects:\t{ov_metrics['num_subjects']}")
print(f"\tTest Accuracy:\t{ov_metrics['test_accuracy']}")
print(f"\tTime Taken:\t{ov_metrics['time_sec']:.4f} sec")

# Calculate the speedup with IPEX compared to PyTorch
ipex_fps_speedup = pt_metrics['time_sec'] / ipex_metrics['time_sec']
print(f"\nSpeedup with IPEX: {ipex_fps_speedup:.2f}x")

# Calculate the speedup with OpenVINO compared to PyTorch
ov_fps_speedup = pt_metrics['time_sec'] / ov_metrics['time_sec']
print(f"\nSpeedup with OpenVINO: {ov_fps_speedup:.2f}x")

PyTorch Metrics:
	Framework Version:	1.13.1+cpu
	Num Subjects:	3600
	Test Accuracy:	99.47222222222223
	Time Taken:	21.9458 sec

IPEX Metrics:
	Framework Version:	1.13.100
	Num Subjects:	3600
	Test Accuracy:	99.47222222222223
	Time Taken:	13.6719 sec

OpenVINO Metrics:
	Framework Version:	2023.0.0-10926-b4452d56304-releases/2023/0
	Num Subjects:	3600
	Test Accuracy:	99.47222222222223
	Time Taken:	13.1644 sec

Speedup with IPEX: 1.61x

Speedup with OpenVINO: 1.67x


In [40]:
#Print System info
lscpu_out=output_dict['system_info']['lscpu_out'].encode().decode('unicode_escape')
print(f"\nSystem Info:\n{lscpu_out}")

mem_out_gb=output_dict['system_info']['mem_out_gb'].encode().decode('unicode_escape')
print(f"\nSystem Memory Info (GB):\n{mem_out_gb}")

os_out=output_dict['system_info']['os'].encode().decode('unicode_escape')
print(f"\nSystem OS:\n{os_out}")


System Info:
Architecture:                    x86_64
CPU op-mode(s):                  32-bit, 64-bit
Byte Order:                      Little Endian
Address sizes:                   46 bits physical, 48 bits virtual
CPU(s):                          4
On-line CPU(s) list:             0-3
Thread(s) per core:              2
Core(s) per socket:              2
Socket(s):                       1
NUMA node(s):                    1
Vendor ID:                       GenuineIntel
CPU family:                      6
Model:                           85
Model name:                      Intel(R) Xeon(R) Gold 6246R CPU @ 3.40GHz
Stepping:                        7
CPU MHz:                         3392.033
BogoMIPS:                        6784.06
Virtualization:                  VT-x
Hypervisor vendor:               Microsoft
Virtualization type:             full
L1d cache:                       64 KiB
L1i cache:                       64 KiB
L2 cache:                        2 MiB
L3 cache:               

#### Delete endpoint

In [None]:
#ml_client.online_endpoints.begin_delete(name=endpoint_name)