In [None]:
# ! pip install azure-ai-ml

In [56]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    OnlineRequestSettings
)
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

In [57]:
# enter details of your AML workspace
subscription_id = "<SUBSCRIPTION_ID>" 
resource_group = "<RESOURCE_GROUP>" 
workspace = "<AML_WORKSPACE_NAME>" 

# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)

In [58]:
# configure an environment
env = Environment(
    conda_file="conda_fmri.yml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    )

# configure an inference configuration with a scoring script
code_config = CodeConfiguration(
        code="fmri_score_code",
        scoring_script="score_opti-bench.py"
    )   
    #score_opti-v2.py

In [59]:
folder_data_model_path="./fmri-uploads"

local_model = Model(
    path=folder_data_model_path,
    type=AssetTypes.CUSTOM_MODEL,
    name="fmri-uploads",
    version="1",
    description="SDKv2-fmri-data-pt-onnx-ov-models with PT, ONNX and OV models of fMRI - final25D model. Also includes 100 IC_niftis test volumes (*.nii.gz)"
)

In [60]:
endpoint_name = "fmri-pt-ipex-ov-12c-sdk-v2"
# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name = endpoint_name, 
    description="this is local: fmri-pt-ipex-ov-local-sdk-v2",
    auth_mode="key"
)

poller = ml_client.online_endpoints.begin_create_or_update(endpoint)
#poller.wait()

In [None]:

req_settings = OnlineRequestSettings(request_timeout_ms=90000)

# Define a deployment
blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=endpoint_name,
    model=local_model,
    environment=env,
    code_configuration=code_config,
    instance_type="Standard_FX12mds", #Standard_FX4mds/12/24, Standard_F2s_v2
    instance_count=1,
    request_settings=req_settings
)


# create the deployment:
ml_client.begin_create_or_update(blue_deployment)



In [62]:
# blue deployment takes 100% traffic
endpoint.traffic = {"blue": 100}
ml_client.begin_create_or_update(endpoint)

<azure.core.polling._poller.LROPoller at 0x7f1754cb3370>

In [None]:
ml_client.online_endpoints.get(name=endpoint_name)

In [None]:
ml_client.online_deployments.get_logs(
    name="blue", endpoint_name=endpoint_name, lines=500
)

In [65]:
# Get the details for online endpoint
deployed_endpoint = ml_client.online_endpoints.get(name=endpoint_name)

# existing traffic details
print(deployed_endpoint.traffic)

# Get the scoring URI
print(deployed_endpoint.scoring_uri)

auth_key = ml_client.online_endpoints.get_keys(endpoint_name).primary_key
print(f"Authkey:{auth_key[:10]}...")

{'blue': 100}
https://fmri-pt-ipex-ov-12c-sdk-v2.eastus.inference.ml.azure.com/score
Authkey:hYnI2oYY62...


In [66]:
import requests
import json

# resp = requests.post(scoring_uri, input_data, headers=headers)
scoring_uri = deployed_endpoint.scoring_uri

# Send HTTP request and obtain results from endpoint. 
# Note: in this example, the input data is already in the container uploaded along with the models during model registration.
response = requests.post(scoring_uri, headers={"Authorization": f"Bearer {auth_key}"}, timeout=90000)
#print(response)
output_dict = json.loads(response.content)
print(json.dumps(output_dict, indent=4))

{
    "pt_summary": {
        "fwk_version": "PyTorch: 1.13.1+cpu",
        "num_subjects": 3600,
        "test_accuracy": 99.47222222222223,
        "time_sec": 0.01954956759106029
    },
    "torch_summary": null,
    "ipex_summary": {
        "fwk_version": "IPEX: 1.13.100",
        "num_subjects": 3600,
        "test_accuracy": 99.47222222222223,
        "time_sec": 0.010153221000324597
    },
    "ipex_eager_summary": null,
    "ov_summary": {
        "fwk_version": "OpenVINO: 2023.0.0-10926-b4452d56304-releases/2023/0",
        "num_subjects": 3600,
        "test_accuracy": 99.47222222222223,
        "time_sec": 0.011029672080820258
    },
    "system_info": {
        "lscpu_out": "Architecture:                    x86_64\nCPU op-mode(s):                  32-bit, 64-bit\nByte Order:                      Little Endian\nAddress sizes:                   46 bits physical, 48 bits virtual\nCPU(s):                          12\nOn-line CPU(s) list:             0-11\nThread(s) per core:  

In [68]:
import json
output_dict = json.loads(response.content)

pt_metrics = output_dict['pt_summary']
ipex_metrics_graph = output_dict['ipex_summary']
ov_metrics = output_dict['ov_summary']

print(f"PyTorch Eager Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['PyTorch']}")
print(f"\tNum Subjects:\t{pt_metrics['num_subjects']}")
print(f"\tTest Accuracy:\t{pt_metrics['test_accuracy']}")
print(f"\tTime Taken:\t{pt_metrics['time_sec']:.4f} sec")

print(f"\nIPEX Graph Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['IPEX']}")
print(f"\tNum Subjects:\t{ipex_metrics_graph['num_subjects']}")
print(f"\tTest Accuracy:\t{ipex_metrics_graph['test_accuracy']}")
print(f"\tTime Taken:\t{ipex_metrics_graph['time_sec']:.4f} sec")

print(f"\nOpenVINO Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['OpenVINO']}")
print(f"\tNum Subjects:\t{ov_metrics['num_subjects']}")
print(f"\tTest Accuracy:\t{ov_metrics['test_accuracy']}")
print(f"\tTime Taken:\t{ov_metrics['time_sec']:.4f} sec")

# Calculate the speedup with IPEX compared to PyTorch
ipex_fps_speedup = pt_metrics['time_sec'] / ipex_metrics_graph['time_sec']
print(f"\nSpeedup with IPEX: {ipex_fps_speedup:.2f}x")

# Calculate the speedup with OpenVINO compared to PyTorch
ov_fps_speedup = pt_metrics['time_sec'] / ov_metrics['time_sec']
print(f"\nSpeedup with OpenVINO: {ov_fps_speedup:.2f}x")

'''
Pytorch Graph mode and IPEX Eager mode benchmarking

pt_metrics_graph = output_dict['torch_summary']
ipex_metrics = output_dict['ipex_eager_summary']

print(f"PyTorch Graph Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['PyTorch']}")
print(f"\tNum Subjects:\t{pt_metrics_graph['num_subjects']}")
print(f"\tTest Accuracy:\t{pt_metrics_graph['test_accuracy']}")
print(f"\tTime Taken:\t{pt_metrics_graph['time_sec']:.4f} sec")


print(f"\nIPEX Eager Metrics:")
print(f"\tFramework Version:\t{output_dict['system_info']['fwk_versions']['IPEX']}")
print(f"\tNum Subjects:\t{ipex_metrics['num_subjects']}")
print(f"\tTest Accuracy:\t{ipex_metrics['test_accuracy']}")
print(f"\tTime Taken:\t{ipex_metrics['time_sec']:.4f} sec")
'''


PyTorch Eager Metrics:
	Framework Version:	1.13.1+cpu
	Num Subjects:	3600
	Test Accuracy:	99.47222222222223
	Time Taken:	0.0195 sec

IPEX Graph Metrics:
	Framework Version:	1.13.100
	Num Subjects:	3600
	Test Accuracy:	99.47222222222223
	Time Taken:	0.0102 sec

OpenVINO Metrics:
	Framework Version:	2023.0.0-10926-b4452d56304-releases/2023/0
	Num Subjects:	3600
	Test Accuracy:	99.47222222222223
	Time Taken:	0.0110 sec

Speedup with IPEX: 1.93x

Speedup with OpenVINO: 1.77x


In [69]:
#Print System info
lscpu_out=output_dict['system_info']['lscpu_out'].encode().decode('unicode_escape')
print(f"\nSystem Info:\n{lscpu_out}")

mem_out_gb=output_dict['system_info']['mem_out_gb'].encode().decode('unicode_escape')
print(f"\nSystem Memory Info (GB):\n{mem_out_gb}")

os_out=output_dict['system_info']['os'].encode().decode('unicode_escape')
print(f"\nSystem OS:\n{os_out}")


System Info:
Architecture:                    x86_64
CPU op-mode(s):                  32-bit, 64-bit
Byte Order:                      Little Endian
Address sizes:                   46 bits physical, 48 bits virtual
CPU(s):                          12
On-line CPU(s) list:             0-11
Thread(s) per core:              2
Core(s) per socket:              6
Socket(s):                       1
NUMA node(s):                    1
Vendor ID:                       GenuineIntel
CPU family:                      6
Model:                           85
Model name:                      Intel(R) Xeon(R) Gold 6246R CPU @ 3.40GHz
Stepping:                        7
CPU MHz:                         3392.029
BogoMIPS:                        6784.05
Virtualization:                  VT-x
Hypervisor vendor:               Microsoft
Virtualization type:             full
L1d cache:                       192 KiB
L1i cache:                       192 KiB
L2 cache:                        6 MiB
L3 cache:           

In [None]:
ml_client.online_endpoints.begin_delete(name=endpoint_name)