In [1]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
)
import os
from pathlib import Path
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential

In [2]:
load_dotenv

<function dotenv.main.load_dotenv(dotenv_path: Union[str, ForwardRef('os.PathLike[str]'), NoneType] = None, stream: Optional[IO[str]] = None, verbose: bool = False, override: bool = False, interpolate: bool = True, encoding: Optional[str] = 'utf-8') -> bool>

In [8]:
AZURE_TENANT_ID = os.environ.get("AZURE_TENANT_ID")
AZURE_SUBSCRIPTION_ID = os.environ.get("AZURE_SUBSCRIPTION_ID")
AZURE_CLIENT_ID = os.environ.get("AZURE_CLIENT_ID")
AZURE_CLIENT_SECRET = os.environ.get("AZURE_CLIENT_SECRET")
AZURE_ML_RESOURCE_GROUP = os.environ.get("AZURE_ML_RESOURCE_GROUP")
AZURE_ML_WORKSPACE = os.environ.get("AZURE_ML_WORKSPACE")

PROJECT_PATH = os.environ.get("PROJECT_PATH")
BASE_PATH = os.path.join(PROJECT_PATH, os.environ.get("BASE_PATH"))
AML_MODEL_NAME = os.environ.get("AML_MODEL_NAME")
ENDPOINT_MODEL_NAME = os.environ.get("ENDPOINT_MODEL_NAME")
MODEL_BASE_PATH = os.environ.get("MODEL_BASE_PATH")

print(BASE_PATH)
# endpoint_path = os.path.join(PROJECT_PATH, BASE_PATH)
model_path = os.path.join(MODEL_BASE_PATH, AML_MODEL_NAME, "1")
model_path

/home/brlamore/src/mlflow_server/online_endpoint


'/var/azureml-app/azureml-models/tfserving-mounted/1'

In [19]:
# !mkdir -p {endpoint_path}

In [4]:
# Download a TensorFlow model
!wget https://aka.ms/half_plus_two-model -O {BASE_PATH}/half_plus_two.tar.gz


--2024-07-22 19:24:47--  https://aka.ms/half_plus_two-model
Resolving aka.ms (aka.ms)... 96.17.65.182
Connecting to aka.ms (aka.ms)|96.17.65.182|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://azuremlexamples.blob.core.windows.net/models/half_plus_two.tar.gz [following]
--2024-07-22 19:24:48--  https://azuremlexamples.blob.core.windows.net/models/half_plus_two.tar.gz
Resolving azuremlexamples.blob.core.windows.net (azuremlexamples.blob.core.windows.net)... 20.60.128.132
Connecting to azuremlexamples.blob.core.windows.net (azuremlexamples.blob.core.windows.net)|20.60.128.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7554 (7.4K) [application/x-gzip]
Saving to: ‘/home/brlamore/src/mlflow_server/online_endpoint/half_plus_two.tar.gz’


2024-07-22 19:24:48 (1.62 GB/s) - ‘/home/brlamore/src/mlflow_server/online_endpoint/half_plus_two.tar.gz’ saved [7554/7554]



In [5]:
!tar -xvf {BASE_PATH}/half_plus_two.tar.gz -C {BASE_PATH}

half_plus_two/
half_plus_two/00000123/
half_plus_two/00000123/saved_model.pb
half_plus_two/00000123/assets/
half_plus_two/00000123/assets/license.txt
half_plus_two/00000123/assets/foo.txt
half_plus_two/00000123/variables/
half_plus_two/00000123/variables/variables.index
half_plus_two/00000123/variables/variables.data-00000-of-00001


In [30]:
print(f"docker run --rm -d -v {BASE_PATH}:{model_path} -p 8501:8501 -e MODEL_BASE_PATH={model_path} -e MODEL_NAME={ENDPOINT_MODEL_NAME}  --name=\"tfserving-test\" docker.io/tensorflow/serving:latest sleep 10")

docker run --rm -d -v /home/brlamore/src/mlflow_server/online_endpoint:/var/azureml-app/azureml-models/tfserving-mounted/1 -p 8501:8501 -e MODEL_BASE_PATH=/var/azureml-app/azureml-models/tfserving-mounted/1 -e MODEL_NAME=half_plus_two  --name="tfserving-test" docker.io/tensorflow/serving:latest sleep 10


In [37]:
# Check liveness
!curl -v "http://localhost:8501/v1/models/{ENDPOINT_MODEL_NAME}"

*   Trying 127.0.0.1:8501...
* Connected to localhost (127.0.0.1) port 8501 (#0)
> GET /v1/models/half_plus_two HTTP/1.1
> Host: localhost:8501
> User-Agent: curl/7.81.0
> Accept: */*
> 
* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< Content-Type: application/json
< Date: Tue, 23 Jul 2024 02:47:44 GMT
< Content-Length: 156
< 
{
 "model_version_status": [
  {
   "version": "123",
   "state": "AVAILABLE",
   "status": {
    "error_code": "OK",
    "error_message": ""
   }
  }
 ]
}
* Connection #0 to host localhost left intact


In [38]:
# Check prediction
!echo {BASE_PATH}/sample_request.json
!curl --header "Content-Type: application/json"   --request POST --data @{BASE_PATH}/sample_request.json http://localhost:8501/v1/models/{ENDPOINT_MODEL_NAME}:predict

/home/brlamore/src/mlflow_server/online_endpoint/sample_request.json


{
    "predictions": [2.5, 3.0, 4.5
    ]
}

In [40]:
!docker stop tfserving-test

tfserving-test


In [41]:

credential = DefaultAzureCredential()
if not credential:
    credential = ClientSecretCredential(
        tenant_id=AZURE_TENANT_ID,
        client_id=AZURE_CLIENT_ID,
        client_secret=AZURE_CLIENT_SECRET,
    )
    
ml_client = MLClient(
    DefaultAzureCredential(), AZURE_SUBSCRIPTION_ID, AZURE_ML_RESOURCE_GROUP, AZURE_ML_WORKSPACE
)

In [42]:
# Creating a unique endpoint name with current datetime to avoid conflicts
import datetime

online_endpoint_name = "endpoint-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is a sample online endpoint",
    auth_mode="key",
    tags={"foo": "bar"},
)

In [43]:
# Create the endpoint
ml_client.begin_create_or_update(endpoint).result()

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://endpoint-07230815437793.westus3.inference.ml.azure.com/score', 'openapi_uri': 'https://endpoint-07230815437793.westus3.inference.ml.azure.com/swagger.json', 'name': 'endpoint-07230815437793', 'description': 'this is a sample online endpoint', 'tags': {'foo': 'bar'}, 'properties': {'createdBy': '0319a625-aacd-4030-af25-09981016d2f1', 'createdAt': '2024-07-23T15:16:07.213121+0000', 'lastModifiedAt': '2024-07-23T15:16:07.213121+0000', 'azureml.onlineendpointid': '/subscriptions/f4f99f06-ec30-4601-b84a-6a47929bc9cc/resourcegroups/rg_aml/providers/microsoft.machinelearningservices/workspaces/blx_aml/onlineendpoints/endpoint-07230815437793', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/f4f99f06-ec30-4601-b84a-6a47929bc9cc/providers/Microsoft.MachineLearningServices/locations/westus3/mfeOperationsStatus/oeidp:22f88b38-6636-4d7c-a9d4-fc0cdeb79f22:70491d

In [44]:
# create a blue deployment
model = Model(name="tfserving-mounted", version="1", path="half_plus_two")

env = Environment(
    image="docker.io/tensorflow/serving:latest",
    inference_config={
        "liveness_route": {"port": 8501, "path": "/v1/models/half_plus_two"},
        "readiness_route": {"port": 8501, "path": "/v1/models/half_plus_two"},
        "scoring_route": {"port": 8501, "path": "/v1/models/half_plus_two:predict"},
    },
)

blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    environment=env,
    environment_variables={
        "MODEL_BASE_PATH": "/var/azureml-app/azureml-models/tfserving-mounted/1",
        "MODEL_NAME": "half_plus_two",
    },
    instance_type="Standard_F2s_v2",
    instance_count=1,
)

In [45]:
ml_client.begin_create_or_update(blue_deployment).result()

Check: endpoint endpoint-07230815437793 exists
[32mUploading half_plus_two (0.02 MBs): 100%|██████████| 23647/23647 [00:00<00:00, 145139.76it/s]
[39m



.........................................................................

ManagedOnlineDeployment({'private_network_connection': None, 'package_model': False, 'provisioning_state': 'Succeeded', 'endpoint_name': 'endpoint-07230815437793', 'type': 'Managed', 'name': 'blue', 'description': None, 'tags': {}, 'properties': {'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/f4f99f06-ec30-4601-b84a-6a47929bc9cc/providers/Microsoft.MachineLearningServices/locations/westus3/mfeOperationsStatus/odidp:22f88b38-6636-4d7c-a9d4-fc0cdeb79f22:59add067-0947-4bc5-8edc-ab68a2114bd5?api-version=2023-04-01-preview'}, 'print_as_yaml': False, 'id': '/subscriptions/f4f99f06-ec30-4601-b84a-6a47929bc9cc/resourceGroups/rg_aml/providers/Microsoft.MachineLearningServices/workspaces/blx_aml/onlineEndpoints/endpoint-07230815437793/deployments/blue', 'Resource__source_path': '', 'base_path': '/home/brlamore/src/mlflow_server/online_endpoint', 'creation_context': <azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.SystemData object at 0x7fca7f1f5ab0>, 'serial