# Deploy Text Embedding Inference (TEI) as Azure ML Online Endpoint

In [1]:
# import required libraries
import time
import json

from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
   ManagedOnlineEndpoint,
   ManagedOnlineDeployment,
   Environment,
)
from azure.identity import DefaultAzureCredential

In [2]:
# enter details of your Azure Machine Learning workspace
subscription_id = "<SUBSCRIPTION_ID>"
resource_group = "<RESOURCE_GROUP>"
workspace = "<AZUREML_WORKSPACE_NAME>"


# get a handle to the workspace
ml_client = MLClient(
   DefaultAzureCredential(), subscription_id, resource_group, workspace
)

In [3]:
# Creating a unique endpoint name with current datetime to avoid conflicts
import datetime

online_endpoint_name = "tei-endpoint-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is a test TEI online endpoint",
    auth_mode="key",
    tags={"name": "arr-TEI"},
)

In [4]:
# create a tei deployment

TEI_IMAGE = "ghcr.io/huggingface/text-embeddings-inference:turing-0.3.0"
DEPLOYMENT_NAME = "tei-test"

env = Environment(
    image=TEI_IMAGE,
    inference_config={
        "liveness_route": {"port": 80, "path": "/health"},
        "readiness_route": {"port": 80, "path": "/health"},
        "scoring_route": {"port": 80, "path": "/embed"},
    }
)

deployment = ManagedOnlineDeployment(
    name=DEPLOYMENT_NAME,
    endpoint_name=online_endpoint_name,
    environment=env,
    environment_variables={
        "MODEL_ID": "BAAI/bge-large-en-v1.5",
        "REVISION": "refs/pr/5",
    },
    instance_type="Standard_NC4as_T4_v3",
    instance_count=1, # configure replicas here
)# 

In [5]:
# create endpoint

ml_client.begin_create_or_update(endpoint)
time.sleep(120) # wait for creation, may take longer

In [8]:
# create deployment

ml_client.begin_create_or_update(deployment)
time.sleep(120) # wait for creation, may take longer

Check: endpoint tei-endpoint-11101935899993 exists


<azure.core.polling._poller.LROPoller at 0x7f3d94a47700>

.

In [9]:
# optionally, you can configure an Autoscaling policy
# see here: https://learn.microsoft.com/en-us/azure/machine-learning/how-to-autoscale-endpoints?view=azureml-api-2&tabs=python

.

In [10]:
# create some data to test with

test_data = {"inputs": "What is Deep Learning?"}
file_name = "test_data.json"

# Save the dictionary as a JSON file
with open(file_name, 'w') as file:
    json.dump(test_data, file)



....

In [18]:
# test the deployment with some sample data
ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    deployment_name=DEPLOYMENT_NAME,
    request_file=file_name,
)

'[[0.018096924,0.0030174255,-0.04989624,-0.03503418,0.014228821,-0.023605347,-0.0158844,-0.02168274,-0.005054474,0.06317139,0.014671326,0.028381348,0.02394104,-0.034088135,-0.019439697,-0.0037326813,-0.016921997,-0.0066947937,-0.046173096,-0.003774643,-0.027175903,0.05065918,-0.027786255,-0.004852295,-0.036987305,0.011482239,0.07141113,0.029159546,0.04776001,0.035003662,-0.019058228,-0.025924683,0.007167816,-0.03250122,-0.020706177,-0.012527466,0.010032654,-0.024887085,-0.06365967,-0.05154419,-0.015235901,0.03253174,0.023162842,-0.094055176,-0.051116943,0.011238098,0.038513184,0.0021133423,-0.01637268,-0.0033016205,-0.0082092285,0.030059814,-0.009017944,-0.014434814,0.016784668,-0.02142334,0.014930725,0.02897644,0.018096924,-0.0042686462,0.046081543,-0.021072388,0.0178833,-0.040740967,0.029724121,0.0027313232,0.010643005,-0.041656494,-0.020706177,-0.029083252,-0.05014038,-0.0056381226,0.009475708,-0.03765869,-0.017593384,0.024368286,-0.0063819885,0.043518066,-0.043762207,0.007484436,0.

In [19]:
# delete the endpoint
ml_client.online_endpoints.begin_delete(name=online_endpoint_name)

<azure.core.polling._poller.LROPoller at 0x7f3d7fc94ca0>

...............................................................

**Additional Resources:**
- [Use a custom container to deploy a model to an online endpoint](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-custom-container?view=azureml-api-2&tabs=cli)
- [Deploy and score a machine learning model by using an online endpoint](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-custom-container?view=azureml-api-2&tabs=cli)
- [Autoscale an online endpoint](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-custom-container?view=azureml-api-2&tabs=cli)
