## Deploy Text Embedding Model (GPT-J 6B FP-16)

#### Imports

In [3]:
from sagemaker.jumpstart.notebook_utils import list_jumpstart_models
from sagemaker.predictor import Predictor
from sagemaker import get_execution_role
from sagemaker.model import Model
from sagemaker import script_uris
from sagemaker import image_uris 
from sagemaker import model_uris
import sagemaker
import logging
import boto3
import time
import json

##### Setup logging

In [4]:
logger = logging.getLogger('sagemaker')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

##### Log versions of dependencies

In [5]:
logger.info(f'Using sagemaker=={sagemaker.__version__}')
logger.info(f'Using boto3=={boto3.__version__}')

Using sagemaker==2.145.0
Using boto3==1.26.108


#### Setup essentials 

##### List and filter all text embedding models available in JumpStart

In [6]:
models = list_jumpstart_models()
logger.info(f'Total number of models in SageMaker JumpStart hub = {len(models)}')

FILTER = 'task == textembedding'
txt2img_models = list_jumpstart_models(filter=FILTER)
txt2img_models

Total number of models in SageMaker JumpStart hub = 649


['huggingface-textembedding-bloom-7b1',
 'huggingface-textembedding-bloom-7b1-fp16',
 'huggingface-textembedding-gpt-j-6b',
 'huggingface-textembedding-gpt-j-6b-fp16']

##### Setup config params

In [21]:
MODEL_ID = 'huggingface-textembedding-bloom-7b1-fp16'  
MODEL_VERSION = '*'
INSTANCE_TYPE = 'ml.g5.2xlarge'
INSTANCE_COUNT = 1
IMAGE_SCOPE = 'inference'
MODEL_DATA_DOWNLOAD_TIMEOUT = 3600  # in seconds
CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT = 3600
CONTENT_TYPE = 'application/json'

# Set up roles and clients 
client = boto3.client('sagemaker-runtime')
ROLE = get_execution_role()
logger.info(f'Role => {ROLE}')

Role => arn:aws:iam::106839800180:role/service-role/AmazonSageMaker-ExecutionRole-20221107T112230


In [23]:
from sagemaker.utils import name_from_base
endpoint_name = name_from_base(MODEL_ID) # Append a timestamp to the provided string

logger.info(f'Endpoint name: {endpoint_name}')

Endpoint name: huggingface-textembedding-bloom-7b1-fp1-2023-04-13-11-29-28-700


#### Retrieve image and model URIs

In [24]:
deploy_image_uri = image_uris.retrieve(region=None, 
                                       framework=None, 
                                       image_scope=IMAGE_SCOPE, 
                                       model_id=MODEL_ID, 
                                       model_version=MODEL_VERSION, 
                                       instance_type=INSTANCE_TYPE)
logger.info(f'Deploy image URI => {deploy_image_uri}')

Deploy image URI => 763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-inference:1.12.0-gpu-py38


In [25]:
model_uri = model_uris.retrieve(model_id=MODEL_ID, 
                                model_version=MODEL_VERSION, 
                                model_scope=IMAGE_SCOPE)
logger.info(f'Model URI => {model_uri}')

Model URI => s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.0.0/infer-prepack-huggingface-textembedding-bloom-7b1-fp16.tar.gz


In [26]:
env = {
    'SAGEMAKER_MODEL_SERVER_TIMEOUT': str(3600),
    'MODEL_CACHE_ROOT': '/opt/ml/model', 
    'SAGEMAKER_ENV': '1',
    'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',
    'SAGEMAKER_PROGRAM': 'inference.py',
    'SAGEMAKER_MODEL_SERVER_WORKERS': '1', 
    'TS_DEFAULT_WORKERS_PER_MODEL': '1', 
}

#### Create SageMaker Model

In [27]:
model = Model(image_uri=deploy_image_uri, 
              model_data=model_uri, 
              role=ROLE, 
              predictor_cls=Predictor, 
              name=endpoint_name, 
              env=env)

#### Deploy text embedding model as SageMaker endpoint for real-time synchronous inference

In [28]:
%%time

_ = model.deploy(initial_instance_count=INSTANCE_COUNT, 
                 instance_type=INSTANCE_TYPE, 
                 endpoint_name=endpoint_name, 
                 model_data_download_timeout=MODEL_DATA_DOWNLOAD_TIMEOUT, 
                 container_startup_health_check_timeout=CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT)

Creating model with name: huggingface-textembedding-bloom-7b1-fp1-2023-04-13-11-29-28-700
CreateModel request: {
    "ModelName": "huggingface-textembedding-bloom-7b1-fp1-2023-04-13-11-29-28-700",
    "ExecutionRoleArn": "arn:aws:iam::106839800180:role/service-role/AmazonSageMaker-ExecutionRole-20221107T112230",
    "PrimaryContainer": {
        "Image": "763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-inference:1.12.0-gpu-py38",
        "Environment": {
            "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600",
            "MODEL_CACHE_ROOT": "/opt/ml/model",
            "SAGEMAKER_ENV": "1",
            "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code/",
            "SAGEMAKER_PROGRAM": "inference.py",
            "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
            "TS_DEFAULT_WORKERS_PER_MODEL": "1"
        },
        "ModelDataUrl": "s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.0.0/infer-prepack-huggingface-textembedding-bloom-7b1-fp16.tar.gz"
    },
    "Tags":

----------!CPU times: user 207 ms, sys: 14.9 ms, total: 221 ms
Wall time: 5min 33s


In [29]:
prompt = '请问AWS Clean Rooms的一个协作中可以有多少个参与方?'
TEXT_EMBEDDING_MODEL_ENDPOINT_NAME = 'huggingface-textembedding-bloom-7b1-fp1-2023-04-13-11-29-28-700'

In [30]:
payload = {'text_inputs': [prompt]}
payload = json.dumps(payload).encode('utf-8')

sagemaker_client = boto3.client('runtime.sagemaker')
response = sagemaker_client.invoke_endpoint(EndpointName=TEXT_EMBEDDING_MODEL_ENDPOINT_NAME, 
                                            ContentType='application/json', 
                                            Body=payload)
body = json.loads(response['Body'].read())
embedding = body['embedding'][0]

In [31]:
print(embedding)

[0.0022760427091270685, 0.0005220313323661685, -0.0033640293404459953, 0.00265699764713645, 0.0004603319976013154, 0.0006847877521067858, 0.0061433641240000725, 0.0036757716443389654, 0.005751546937972307, 0.0010775242699310184, -0.007867978885769844, 0.00251408782787621, -0.0005305070080794394, 0.0007272417424246669, -0.00016569695435464382, 0.0005820182268507779, -0.002726316452026367, -0.0018575744470581412, -7.240525155793875e-05, 0.0029908176511526108, -0.00024354543711524457, -0.0023575949016958475, -0.0003261571400798857, 0.005014562513679266, -0.0012012255610898137, 0.001143365167081356, 0.0044100405648350716, 0.003216979093849659, -0.027189910411834717, -0.007975797168910503, -0.0009643006487749517, -0.0022356926929205656, -0.0006579704931937158, -0.000445015262812376, -0.0009157515596598387, -0.00015856399841140956, -0.0008840392110869288, 0.002918636193498969, -0.003202068852260709, -0.0017294753342866898, 0.0011854851618409157, -0.0006143208593130112, -0.001969535369426012,

In [20]:
!aws sagemaker delete-endpoint --endpoint-name huggingface-textembedding-gpt-j-6b-fp16-1681377910

In [None]:
!aws sagemaker delete-endpoint-config --endpoint-config-name bloomz-7b1-mt-2023-04-13-09-54-18-256-config