# 1. Connect to Azure Machine Learning

### 1.2 Set workspace details

In [1]:
# enter details of your AML workspace
SUBSCRIPTION_ID = "subid"
RESOURCE_GROUP = "rg"
AML_WORKSPACE_NAME = "workspace"
AZURE_AI_SERVICES_NAME = "<AZURE_AI_SERVICES_NAME>"

### 1.3 Login to your Azure account

In [2]:
# Authenticate clients
from azure.identity import (
    DefaultAzureCredential,
    InteractiveBrowserCredential,
    AzureCliCredential,
)
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    AzureMLOnlineInferencingServer,
    ModelPackage,
    CodeConfiguration,
    BaseEnvironment,
    ModelConfiguration,
)
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Environment,
    Model,
)

try:
    credential = DefaultAzureCredential(additionally_allowed_tenants=["*"])
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential(additionally_allowed_tenants=["*"])

# If login doesn't work above, uncomment the code below and login using device code
# !az login --use-device-code

# 2. Managed Online Endpoint

In [7]:
# create a endpoint
import datetime

from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
)

from azure.ai.ml import (
    MLClient,
)

from azure.ai.ml.entities import (
    ManagedOnlineDeployment,
    OnlineRequestSettings,
    Model,
    Environment,
)

In [8]:
time = str(datetime.datetime.now().strftime("%m%d%H%M%f"))

In [9]:
online_endpoint_name = f"aml-llm-lc-demo-{time}"


# get a handle to the workspace
ml_client = MLClient(credential, SUBSCRIPTION_ID, RESOURCE_GROUP, AML_WORKSPACE_NAME)

In [6]:
# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="online endpoint for streaming example",
    auth_mode="key",
)

endpoint = ml_client.begin_create_or_update(endpoint).result()

print(endpoint)

auth_mode: key
description: online endpoint for streaming example
id: /subscriptions/6025ba02-1dfd-407f-b358-88f811c7c7aa/resourceGroups/sc1-ml1/providers/Microsoft.MachineLearningServices/workspaces/sc1ml1/onlineEndpoints/aml-llm-lc-demo-09252220167763
identity:
  principal_id: 6ad1fbd2-06d6-4f9d-8a3b-dccce08fbdef
  tenant_id: 16b3c013-d300-468d-ac64-7eda0820b6d3
  type: system_assigned
kind: Managed
location: southcentralus
mirror_traffic: {}
name: aml-llm-lc-demo-09252220167763
openapi_uri: https://aml-llm-lc-demo-09252220167763.southcentralus.inference.ml.azure.com/swagger.json
properties:
  AzureAsyncOperationUri: https://management.azure.com/subscriptions/6025ba02-1dfd-407f-b358-88f811c7c7aa/providers/Microsoft.MachineLearningServices/locations/southcentralus/mfeOperationsStatus/oeidp:a2f597f4-1a31-4e25-9c39-aa7e2d3b6df0:44af2992-ed84-46d8-8c44-c9f0dd0b5963?api-version=2022-02-01-preview
  azureml.onlineendpointid: /subscriptions/6025ba02-1dfd-407f-b358-88f811c7c7aa/resourcegroup

In [4]:
# assign the Cognitive Services User role to the endpoint
endpoint_principal_id = endpoint.identity.principal_id
!az role assignment create --assignee-principal-type ServicePrincipal --assignee-object-id {endpoint_principal_id} --role "Cognitive Services User" --scope /subscriptions/{SUBSCRIPTION_ID}/resourceGroups/{RESOURCE_GROUP}/providers/Microsoft.CognitiveServices/accounts/{AZURE_AI_SERVICES_NAME}

/bin/bash: -c: line 0: syntax error near unexpected token `newline'
/bin/bash: -c: line 0: `az role assignment create --assignee-principal-type ServicePrincipal --assignee-object-id b06d0fcb-bf08-4327-bdea-147c6a27fcda --role "Cognitive Services User" --scope /subscriptions/6025ba02-1dfd-407f-b358-88f811c7c7aa/resourceGroups/sc1-ml1/providers/Microsoft.CognitiveServices/accounts/<AZURE_AI_SERVICES_NAME>'


### Register Model

In [11]:
#Register model to workspace model registry
model=Model(name="stream_demo",path="./src")
ml_client.models.create_or_update(model)

reg_model = ml_client.models.get("stream_demo", label="latest")

[32mUploading src (0.0 MBs): 100%|██████████| 1635/1635 [00:00<00:00, 15248.32it/s]
[39m



# 3. Deploy to Endpoint

In [12]:
reg_env = Environment(
    image="FROM mcr.microsoft.com/azureml/inference-base-2204:20240916.v1",
    conda_file="./env/conda_dependencies.yaml",
    name="stream_env_example",
    description="Environment created from a Docker image plus Conda environment.",
)
ml_client.environments.create_or_update(reg_env)

Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': 'mcr.microsoft.com/azureml/inference-base-2204', 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'stream_env_example', 'description': 'Environment created from a Docker image plus Conda environment.', 'tags': {}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/6025ba02-1dfd-407f-b358-88f811c7c7aa/resourceGroups/sc1-ml1/providers/Microsoft.MachineLearningServices/workspaces/sc1ml1/environments/stream_env_example/versions/1', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/jacwang1/code/Users/jacwang/sample_streaming_aml_inference', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7fcb14b3ec50>, 'serialize': <msrest.serialization.Serializer object at 0x7fcb14b3dc90>, 'version': '1', 'conda_file': {'channels': ['conda-forg

In [13]:
reg_env = ml_client.environments.get("stream_env_example", label="latest")

In [14]:
reg_model =  ml_client.models.get("stream_demo", label="latest")

In [16]:
deployment_name = f"deploy-{time}-4"
lc_deployment = ManagedOnlineDeployment(
    name=deployment_name,
    environment=reg_env,
    model=reg_model,
    code_configuration=CodeConfiguration(code="src", scoring_script="score.py"),
    request_settings=OnlineRequestSettings(request_timeout_ms=60000),
    endpoint_name= online_endpoint_name,
    instance_type="Standard_F2s_v2",
    instance_count=1,
)
ml_client.online_deployments.begin_create_or_update(lc_deployment).result()

endpoint.traffic = {deployment_name: 100}
ml_client.begin_create_or_update(endpoint).result()

Check: endpoint aml-llm-lc-demo-09252220167763 exists
[32mUploading src (0.0 MBs): 100%|██████████| 1635/1635 [00:00<00:00, 46802.48it/s]
[39m



..........................................................................

NameError: name 'endpoint' is not defined

# 4. Test
Now endpoint has been deployed, let's test it.

In [20]:
import urllib.request
import json
import os
import ssl
import requests

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

data = {}

body = str.encode(json.dumps(data))

url = 'https://endpoint/score'
# Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
api_key = 'key'
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")

headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

with requests.get(url=url, headers=headers, stream=True) as r:
    for chunk in r.iter_lines():  # or, for line in r.iter_lines():
        print(chunk, end='')

b'start:'b'Lorem,'b'ipsum,'b'dolor,'b'sit,'b'amet,,'b'consectetur,'b'adipiscing,'b'elit.,'b'Sed,'b'do,'b'eiusmod,'b'tempor,'b'incididunt,'b'ut,'b'labore,'b'et,'b'dolore,'b'magna,'b'aliqua.,'b'Ut,'b'enim,'b'ad,'b'minim,'b'veniam,,'b'quis,'b'nostrud,'b'exercitation,'b'ullamco,'b'laboris,'b'nisi,'b'ut,'b'aliquip,'b'ex,'b'ea,'b'commodo,'b'consequat.,'

# 5. Clean up resources

In [21]:
ml_client.online_endpoints.begin_delete(name=online_endpoint_name)

<azure.core.polling._poller.LROPoller at 0x7fcaeeb04b20>