In [1]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    KubernetesOnlineEndpoint,
    KubernetesOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
)
from azure.identity import DefaultAzureCredential


In [2]:
# enter details of your AML workspace
subscription_id = "ea6d544d-2425-4667-8d38-51f050d9d69e"
resource_group = "rg-vtpoc-9dev"
workspace = "mlw-vtpoc-9dev"


In [3]:
# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)


In [5]:
# Define an endpoint name
endpoint_name = "my-endpoint"

# Example way to define a random name
import datetime

endpoint_name = "endpt-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = KubernetesOnlineEndpoint(
    name=endpoint_name,
    description="this is a sample online endpoint",
)
print(endpoint_name)

endpt-02282058598660


In [6]:
ml_client.online_endpoints.begin_create_or_update(endpoint, local=True)

Creating local endpoint (endpt-02282058598660) .Done (0m 5s)


ManagedOnlineEndpoint({'public_network_access': None, 'provisioning_state': None, 'scoring_uri': None, 'openapi_uri': None, 'name': 'endpt-02282058598660', 'description': 'this is a sample online endpoint', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/Users/amohajerani/.azureml/inferencing/endpt-02282058598660'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x119b46010>, 'auth_mode': 'key', 'location': None, 'identity': None, 'traffic': {}, 'mirror_traffic': {}, 'kind': None})

In [25]:
model = Model(path="./models/vt-model/model.pkl")
env = Environment(
    conda_file="../../data-science/environment/train-conda.yml",
    image="crvtpoc9dev.azurecr.io/local_deploy_img:v1",
)

In [26]:
blue_deployment = KubernetesOnlineDeployment(
    name="blue",
    endpoint_name=endpoint_name,
    model=model,
    environment=env,
    code_configuration=CodeConfiguration(
        code="../../mlops/scripts", scoring_script="score.py"
    ),
    instance_count=1,
)

In [27]:

ml_client.online_deployments.begin_create_or_update(
    deployment=blue_deployment, local=True
)

Updating local deployment (endpt-02282058598660 / blue) .
Building Docker image from Dockerfile
Step 1/6 : FROM crvtpoc9dev.azurecr.io/local_deploy_img:v1
 ---> 847acc328956
Step 2/6 : RUN mkdir -p /var/azureml-app/
 ---> Using cache
 ---> b62640db02bf
Step 3/6 : WORKDIR /var/azureml-app/
 ---> Using cache
 ---> f364f9adf4cd
Step 4/6 : COPY conda.yml /var/azureml-app/
 ---> Using cache
 ---> 78d12a42a069
Step 5/6 : RUN conda env create -n inf-conda-env --file conda.yml
 ---> Using cache
 ---> 59fbb4e596fe
Step 6/6 : CMD ["conda", "run", "--no-capture-output", "-n", "inf-conda-env", "runsvdir", "/var/runit"]
 ---> Using cache
 ---> c0ab499e1a36
Successfully built c0ab499e1a36
Successfully tagged endpt-02282058598660:blue

Starting up endpoint.....Done (0m 30s)


KubernetesOnlineDeployment({'provisioning_state': 'Succeeded', 'endpoint_name': 'endpt-02282058598660', 'type': 'Kubernetes', 'name': 'blue', 'description': None, 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/Users/amohajerani/Desktop/code/visit-t-poc/local_deploy'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x11c8787d0>, 'model': Model({'job_name': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': '2fbc54b506f55723f0d238d0ea428147', 'description': None, 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/Users/amohajerani/Desktop/code/visit-t-poc/local_deploy'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x11c8b2b50>, 'version': '1', 'latest_version': None, 'path': '/Users/amohajerani/De

In [31]:
ml_client.online_endpoints.get(name=endpoint_name, local=True)


ManagedOnlineEndpoint({'public_network_access': None, 'provisioning_state': 'Succeeded', 'scoring_uri': 'http://localhost:32781/score', 'openapi_uri': None, 'name': 'endpt-02282058598660', 'description': 'this is a sample online endpoint', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': PosixPath('/Users/amohajerani/Desktop/code/visit-t-poc/local_deploy'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x11c90aed0>, 'auth_mode': 'key', 'location': 'local', 'identity': None, 'traffic': {}, 'mirror_traffic': {}, 'kind': None})

In [34]:
ml_client.online_deployments.get_logs(
    name="blue", endpoint_name=endpoint_name, local=True, lines=50
)

'2024-02-29T02:49:53,250907637+00:00 | gunicorn/run | \r\n2024-02-29T02:49:53,281944137+00:00 | gunicorn/run | ###############################################\r\n2024-02-29T02:49:53,314129679+00:00 | gunicorn/run | AzureML Inference Server\r\n2024-02-29T02:49:53,350316637+00:00 | gunicorn/run | ###############################################\r\n2024-02-29T02:49:53,381935429+00:00 | gunicorn/run | \r\n2024-02-29T02:49:59,946311834+00:00 | gunicorn/run | Starting AzureML Inference Server HTTP.\r\n2024-02-29 02:50:01,002 I [59] azmlinfsrv - Loaded logging config from /opt/miniconda/envs/inf-conda-env/lib/python3.10/site-packages/azureml_inference_server_http/logging.json\r\n\r\nAzure ML Inferencing HTTP server v1.0.0\r\n\r\n\r\nServer Settings\r\n---------------\r\nEntry Script Name: /var/azureml-app/scripts/score.py\r\nModel Directory: /var/azureml-app/azureml-models//2fbc54b506f55723f0d238d0ea428147/1\r\nConfig File: None\r\nWorker Count: 1\r\nWorker Timeout (seconds): 300\r\nServer Por

In [33]:
ml_client.online_endpoints.invoke(
    endpoint_name=endpoint_name,
    request_file="../../data/test-request.json",
    local=True,
)

'[30.32259750366211, 29.90568733215332]'

In [37]:
# let's deploy in the cloud . We should delete the reaming code later on
# Creating a unique endpoint name with current datetime to avoid conflicts
import datetime

online_endpoint_name = "k8s-endpoint-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = KubernetesOnlineEndpoint(
    name=online_endpoint_name,
    compute="aks-dev",
    description="this is a sample online endpoint",
    auth_mode="key",
    tags={"foo": "bar"},
)

In [38]:
ml_client.begin_create_or_update(endpoint).result()

KubernetesOnlineEndpoint({'provisioning_state': 'Succeeded', 'scoring_uri': 'http://4.149.66.177/api/v1/endpoint/k8s-endpoint-02282157694833/score', 'openapi_uri': 'http://4.149.66.177/api/v1/endpoint/k8s-endpoint-02282157694833/swagger.json', 'name': 'k8s-endpoint-02282157694833', 'description': 'this is a sample online endpoint', 'tags': {'foo': 'bar'}, 'properties': {'azureml.onlineendpointid': '/subscriptions/ea6d544d-2425-4667-8d38-51f050d9d69e/resourcegroups/rg-vtpoc-9dev/providers/microsoft.machinelearningservices/workspaces/mlw-vtpoc-9dev/onlineendpoints/k8s-endpoint-02282157694833', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/ea6d544d-2425-4667-8d38-51f050d9d69e/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/oe:82335111-e642-4809-92c0-ac1e65471504:39a1ec80-aa93-468f-aa2a-68914faa422d?api-version=2022-02-01-preview'}, 'print_as_yaml': True, 'id': '/subscriptions/ea6d544d-2425-4667-8d38-51f050d9d69e/resourceGroups/rg-vt

In [39]:
from azure.ai.ml.entities._deployment.resource_requirements_settings import (
    ResourceRequirementsSettings,
)
from azure.ai.ml.entities._deployment.container_resource_settings import (
    ResourceSettings,
)


blue_deployment = KubernetesOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    environment=env,
    code_configuration=CodeConfiguration(
        code="../../mlops/scripts", scoring_script="score.py"
    ),
    instance_count=1,
    resources=ResourceRequirementsSettings(
        requests=ResourceSettings(
            cpu="100m",
            memory="0.5Gi",
        ),
    ),
)

In [40]:
ml_client.begin_create_or_update(blue_deployment).result()

Check: endpoint k8s-endpoint-02282157694833 exists
[32mUploading scripts (0.03 MBs): 100%|██████████| 31152/31152 [00:00<00:00, 95593.53it/s]
[39m

[32mUploading model.pkl[32m (< 1 MB): 100%|██████████| 190k/190k [00:00<00:00, 831kB/s]
[39m



.................................................................................................................................................................

KubernetesOnlineDeployment({'provisioning_state': 'Succeeded', 'endpoint_name': 'k8s-endpoint-02282157694833', 'type': 'Kubernetes', 'name': 'blue', 'description': None, 'tags': {}, 'properties': {'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/ea6d544d-2425-4667-8d38-51f050d9d69e/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/od:82335111-e642-4809-92c0-ac1e65471504:51d94e60-ba41-47b6-9942-0bd6cf7be030?api-version=2023-04-01-preview'}, 'print_as_yaml': True, 'id': '/subscriptions/ea6d544d-2425-4667-8d38-51f050d9d69e/resourceGroups/rg-vtpoc-9dev/providers/Microsoft.MachineLearningServices/workspaces/mlw-vtpoc-9dev/onlineEndpoints/k8s-endpoint-02282157694833/deployments/blue', 'Resource__source_path': None, 'base_path': '/Users/amohajerani/Desktop/code/visit-t-poc/local_deploy', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x11cfc8410>, 'model': '/subscriptions/ea6d544d-2425-4667-8d38-51f050d9d69

In [41]:
# blue deployment takes 100 traffic
endpoint.traffic = {"blue": 100}
ml_client.begin_create_or_update(endpoint).result()

KubernetesOnlineEndpoint({'provisioning_state': 'Succeeded', 'scoring_uri': 'http://4.149.66.177/api/v1/endpoint/k8s-endpoint-02282157694833/score', 'openapi_uri': 'http://4.149.66.177/api/v1/endpoint/k8s-endpoint-02282157694833/swagger.json', 'name': 'k8s-endpoint-02282157694833', 'description': 'this is a sample online endpoint', 'tags': {'foo': 'bar'}, 'properties': {'azureml.onlineendpointid': '/subscriptions/ea6d544d-2425-4667-8d38-51f050d9d69e/resourcegroups/rg-vtpoc-9dev/providers/microsoft.machinelearningservices/workspaces/mlw-vtpoc-9dev/onlineendpoints/k8s-endpoint-02282157694833', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/ea6d544d-2425-4667-8d38-51f050d9d69e/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/oe:82335111-e642-4809-92c0-ac1e65471504:ba9e0bd4-89cb-4425-9e0a-a9e29d6d448d?api-version=2022-02-01-preview'}, 'print_as_yaml': True, 'id': '/subscriptions/ea6d544d-2425-4667-8d38-51f050d9d69e/resourceGroups/rg-vt

In [44]:


ml_client.online_endpoints.invoke(
   endpoint_name=online_endpoint_name,
   deployment_name='blue',
   request_file='../../data/test-request.json')

'[30.32259750366211, 29.90568733215332]'