install required libraries

In [None]:
!pip install tensorflow
!pip install --upgrade tensorflow-hub
!pip install tensorflow-text
!pip install sentencepiece

install required libraries

In [None]:
!pip install tensorflow
!pip install --upgrade tensorflow-hub
!pip install tensorflow-text
!pip install sentencepiece

import required libraries 

In [None]:
import numpy as np
import os
import pandas as pd
import tensorflow_text
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_text import SentencepieceTokenizer
import sentencepiece
import sagemaker
from sagemaker import get_execution_role
import json
import boto3
import time

create directory structure in a format SageMaker Tensorflow serving expects 

In [None]:
!mkdir method-embeddings-model-blog
!mkdir method-embeddings-model-blog/model
!mkdir method-embeddings-model-blog/model/001
!mkdir method-embeddings-model-blog/code

prepare untarred model directory structure 

In [None]:
export_path = f"method-embeddings-model-blog"
model_name = "model"
model_path = f"{export_path}/{model_name}/001"

url to download the open-source universal-sentence-encoder-multilingual model from TensorFlow hub. More details here: https://tfhub.dev/google/universal-sentence-encoder-multilingual/3

In [None]:
tf_model_download_path = "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3?tf-hub-format=compressed" 

In [None]:
%%writefile inference.py

import base64
import io
import json
import requests
import boto3
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)


def input_handler(data, context):
    """ Pre-process request input before it is sent to TensorFlow Serving REST API
    Args:
        data (obj): the request data, in format of dict or string
        context (Context): an object containing request and configuration details
    Returns:
        (dict): a JSON-serializable dict that contains request body and headers
    """
    event = data.read().decode('utf-8')
    data = json.loads(event)
    string_input = data["key"]
    array_sentence = sent_tokenize(string_input)
    logger.debug(array_sentence)
    return json.dumps({"instances": [array_sentence]}) 


def output_handler(data, context):
    """Post-process TensorFlow Serving output before it is returned to the client.
    Args:
        data (obj): the TensorFlow serving response
        context (Context): an object containing request and configuration details
    Returns:
        (bytes, string): data to return to client, response content type
    """
    if data.status_code != 200:
        raise ValueError(data.content.decode('utf-8'))

    response_content_type = context.accept_header
    prediction = data.content
    return prediction, response_content_type 

In [None]:
%%writefile requirements.txt
boto3
pysqlite3
requests
nltk

move relevant files to the untarred directory structure 

In [None]:
!cp inference.py $export_path/code/
!cp requirements.txt $export_path/code/

extract the contents of the model tarball downloaded from the Tensorflow hub to the local directory structure

In [None]:
%%time
!curl -L {tf_model_download_path} | tar -zxvC {model_path}

create a compressed archive of the directory "method-embeddings-model-blog/" and saves it as "model.tar.gz" 

In [None]:
%%time
!tar -C "$PWD" -czf model.tar.gz method-embeddings-model-blog/

upload the "model.tar.gz" to S3

In [None]:
%%time
time.ctime()
from sagemaker.session import Session

model_data = Session().upload_data(path="model.tar.gz", key_prefix="model")
print("model uploaded to: {}".format(model_data))

create a SageMaker Model that contains references to a model.tar.gz file in S3 containing serialized model data, and a Docker image used to serve predictions with that model.

In [None]:
from sagemaker.tensorflow.model import TensorFlowModel
sagemaker_session = sagemaker.Session()
sagemaker_role = get_execution_role()
framework_version = '2.10'

tensorflow_serving_model = TensorFlowModel(model_data=model_data,
                                 role=sagemaker_role,
                                 framework_version=framework_version,
                                 sagemaker_session=sagemaker_session,
                                entry_point='inference.py',source_dir=f"{export_path}/code")

Call deploy on a TensorFlow estimator object to create a SageMaker Endpoint.
The SageMaker sdk v2 warning can be safely ignored. The deploy() function uses a default value of update_endpoint=None which triggers the warning

In [None]:
predictor = tensorflow_serving_model.deploy(initial_instance_count=1, instance_type="ml.c5.xlarge")

In [None]:
print(predictor.endpoint_name)
endpointName = predictor.endpoint_name

create a Predictor implementation for inference against TensorFlow Serving endpoints.

In [None]:
from sagemaker.predictor import Predictor
multilingual_predictor = Predictor(endpoint_name=endpointName,
                      sagemaker_session=sagemaker.Session(),
                      serializer=sagemaker.serializers.JSONSerializer(),
                      deserializer=sagemaker.deserializers.JSONDeserializer())

upload the predictor.endpoint_name to the ssm parameter. you can reference this parameter in the CDK code. 

In [None]:
ssm_client = boto3.client('ssm')
ssm_client.put_parameter(
    Name='sagemaker-endpoint',
    Value=endpointName,
    Type='String',Overwrite=True)

response = ssm_client.get_parameter(
    Name='sagemaker-endpoint'
)

print(response['Parameter']['Value'])