# 1. Connect to Azure Machine Learning

### 1.2 Set workspace details

In [21]:
# enter details of your AML workspace
SUBSCRIPTION_ID = "sub"
RESOURCE_GROUP = "rg"
AML_WORKSPACE_NAME = "workspace"
AZURE_AI_SERVICES_NAME = "<AZURE_AI_SERVICES_NAME>"

### 1.3 Login to your Azure account

In [22]:
# Authenticate clients
from azure.identity import (
    DefaultAzureCredential,
    InteractiveBrowserCredential,
    AzureCliCredential,
)

try:
    credential = DefaultAzureCredential(additionally_allowed_tenants=["*"])
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential(additionally_allowed_tenants=["*"])

# If login doesn't work above, uncomment the code below and login using device code
# !az login --use-device-code

# 2. Managed Online Endpoint

In [36]:
# create a endpoint
import datetime

from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
)

from azure.ai.ml import (
    MLClient,
)

from azure.ai.ml.entities import (
    ManagedOnlineDeployment,
    OnlineRequestSettings,
    Model,
    Environment,
)

In [25]:
time = str(datetime.datetime.now().strftime("%m%d%H%M%f"))

In [26]:
online_endpoint_name = f"aml-llm-lc-demo-{time}"


# get a handle to the workspace
ml_client = MLClient(credential, SUBSCRIPTION_ID, RESOURCE_GROUP, AML_WORKSPACE_NAME)

In [None]:
# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="online endpoint for streaming example",
    auth_mode="key",
)

endpoint = ml_client.begin_create_or_update(endpoint).result()

print(endpoint)

In [4]:
# assign the Cognitive Services User role to the endpoint
endpoint_principal_id = endpoint.identity.principal_id
!az role assignment create --assignee-principal-type ServicePrincipal --assignee-object-id {endpoint_principal_id} --role "Cognitive Services User" --scope /subscriptions/{SUBSCRIPTION_ID}/resourceGroups/{RESOURCE_GROUP}/providers/Microsoft.CognitiveServices/accounts/{AZURE_AI_SERVICES_NAME}

/bin/bash: -c: line 0: syntax error near unexpected token `newline'
/bin/bash: -c: line 0: `az role assignment create --assignee-principal-type ServicePrincipal --assignee-object-id b06d0fcb-bf08-4327-bdea-147c6a27fcda --role "Cognitive Services User" --scope /subscriptions/6025ba02-1dfd-407f-b358-88f811c7c7aa/resourceGroups/sc1-ml1/providers/Microsoft.CognitiveServices/accounts/<AZURE_AI_SERVICES_NAME>'


### Register Model

In [27]:
#Register model to workspace model registry
model=Model(name="stream_demo",path="../src/stream_model")
ml_client.models.create_or_update(model)

reg_model = ml_client.models.get("stream_demo", label="latest")

[32mUploading stream_model (0.31 MBs):   0%|          | 0/310854 [00:00<?, ?it/s][32mUploading stream_model (0.31 MBs):   0%|          | 373/310854 [00:00<01:36, 3220.70it/s][32mUploading stream_model (0.31 MBs): 100%|██████████| 310854/310854 [00:00<00:00, 2149858.65it/s]
[39m



# 3. Deploy to Endpoint

In [38]:
reg_env = ml_client.environments.create_or_update(Environment(name='stream_demo_env', image='mcr.microsoft.com/azureml/curated/minimal-app-quickstart:7'))

In [40]:
deployment_name = f"deploy-{time}-4"
lc_deployment = ManagedOnlineDeployment(
    name=deployment_name,
    environment=reg_env,
    model=reg_model,
    request_settings=OnlineRequestSettings(request_timeout_ms=60000),
    endpoint_name=online_endpoint_name,
    instance_type="Standard_F2s_v2",
    instance_count=1,
)
ml_client.online_deployments.begin_create_or_update(lc_deployment).result()

endpoint.traffic = {deployment_name: 100}
ml_client.begin_create_or_update(endpoint).result()

Check: endpoint basic-chat-endpoint-sdk exists
Readonly attribute principal_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>
Readonly attribute tenant_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>


............................................

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://basic-chat-endpoint-sdk.southcentralus.inference.ml.azure.com/', 'openapi_uri': 'https://basic-chat-endpoint-sdk.southcentralus.inference.ml.azure.com/swagger.json', 'name': 'basic-chat-endpoint-sdk', 'description': 'this is a sample endpoint', 'tags': {}, 'properties': {'createdBy': 'Jake Wang', 'createdAt': '2024-09-06T22:00:39.135351+0000', 'lastModifiedAt': '2024-09-06T22:00:39.135351+0000', 'azureml.onlineendpointid': '/subscriptions/6025ba02-1dfd-407f-b358-88f811c7c7aa/resourcegroups/sc1-ml1/providers/microsoft.machinelearningservices/workspaces/sc1ml1/onlineendpoints/basic-chat-endpoint-sdk', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/6025ba02-1dfd-407f-b358-88f811c7c7aa/providers/Microsoft.MachineLearningServices/locations/southcentralus/mfeOperationsStatus/oeidp:a2f597f4-1a31-4e25-9c39-aa7e2d3b6df0:c5bec0d9-fcda-401d-828e-a2693d7b0b01

# 4. Test
Now endpoint has been deployed, let's test it.

In [41]:
import urllib.request
import json
import os
import ssl
import requests

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

data = {}

body = str.encode(json.dumps(data))

url = '<endpoint>/stream'
# Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
api_key = '<key>'
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")

headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

with requests.get(url=url, headers=headers, stream=True) as r:
    for chunk in r.iter_content(5):  # or, for line in r.iter_lines():
        print(chunk)

b'Lorem'
b'ipsum'
b'dolor'
b'sit'
b'amet,'
b'conse'
b'ctetu'
b'r'
b'adipi'
b'scing'
b'elit.'
b'Sed'
b'do'
b'eiusm'
b'od'
b'tempo'
b'r'
b'incid'
b'idunt'
b'ut'
b'labor'
b'e'
b'et'
b'dolor'
b'e'
b'magna'
b'aliqu'
b'a.'
b'Ut'
b'enim'
b'ad'
b'minim'
b'venia'
b'm,'
b'quis'
b'nostr'
b'ud'
b'exerc'
b'itati'
b'on'
b'ullam'
b'co'
b'labor'
b'is'
b'nisi'
b'ut'
b'aliqu'
b'ip'
b'ex'
b'ea'
b'commo'
b'do'
b'conse'
b'quat.'
b'Duis'
b'aute'
b'irure'
b'dolor'
b'in'
b'repre'
b'hende'
b'rit'
b'in'
b'volup'
b'tate'
b'velit'
b'esse'
b'cillu'
b'm'
b'dolor'
b'e'
b'eu'
b'fugia'
b't'
b'nulla'
b'paria'
b'tur.'
b'Excep'
b'teur'
b'sint'
b'occae'
b'cat'
b'cupid'
b'atat'
b'non'
b'proid'
b'ent,'
b'sunt'
b'in'
b'culpa'
b'qui'
b'offic'
b'ia'
b'deser'
b'unt'
b'molli'
b't'
b'anim'
b'id'
b'est'
b'labor'
b'um.'


In [9]:
with requests.get(url=url, headers=headers, stream=True) as r:
    for chunk in r.iter_content(5):  # or, for line in r.iter_lines():
        print(chunk)

b'Lorem'
b'ipsum'
b'dolor'
b'sit'
b'amet,'
b'conse'
b'ctetu'
b'r'
b'adipi'
b'scing'
b'elit.'
b'Sed'
b'do'
b'eiusm'
b'od'
b'tempo'
b'r'
b'incid'
b'idunt'
b'ut'
b'labor'
b'e'
b'et'
b'dolor'
b'e'
b'magna'
b'aliqu'
b'a.'
b'Ut'
b'enim'
b'ad'
b'minim'
b'venia'
b'm,'
b'quis'
b'nostr'
b'ud'
b'exerc'
b'itati'
b'on'
b'ullam'
b'co'
b'labor'
b'is'
b'nisi'
b'ut'
b'aliqu'
b'ip'
b'ex'
b'ea'
b'commo'
b'do'
b'conse'
b'quat.'
b'Duis'
b'aute'
b'irure'
b'dolor'
b'in'
b'repre'
b'hende'
b'rit'
b'in'
b'volup'
b'tate'
b'velit'
b'esse'
b'cillu'
b'm'
b'dolor'
b'e'
b'eu'
b'fugia'
b't'
b'nulla'
b'paria'
b'tur.'
b'Excep'
b'teur'
b'sint'
b'occae'
b'cat'
b'cupid'
b'atat'
b'non'
b'proid'
b'ent,'
b'sunt'
b'in'
b'culpa'
b'qui'
b'offic'
b'ia'
b'deser'
b'unt'
b'molli'
b't'
b'anim'
b'id'
b'est'
b'labor'
b'um.'


# 5. Clean up resources

In [10]:
ml_client.online_endpoints.begin_delete(name=online_endpoint_name)

<azure.core.polling._poller.LROPoller at 0x7f0c103305b0>

...........