#### Imports 

In [1]:
from time import gmtime, strftime
from sagemaker import image_uris
import sagemaker
import logging
import boto3
import json

#### Setup

In [2]:
logger = logging.getLogger('__name__')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

In [3]:
logger.info(f'Using SageMaker version: {sagemaker.__version__}')

Using SageMaker version: 2.49.0


In [4]:
region = sagemaker.Session().boto_region_name
role = sagemaker.get_execution_role()
boto3.setup_default_session(region_name=region)
boto_session = boto3.Session(region_name=region)
sagemaker_client = boto_session.client('sagemaker')
current_timestamp = strftime('%m-%d-%H-%M', gmtime())
logger.info(f'Region = {region}')

Region = us-east-1


## How do I create an endpoint using model artifacts in S3 

#### Retrieve the inference image and model data s3 location

In [5]:
MODEL_DATA_URL = 's3://sagemaker-us-east-1-892313895307/pipeline/model/model.tar.gz'

In [6]:
ecr_image = image_uris.retrieve(framework='huggingface', 
                                region='us-east-1', 
                                version='4.6.1', 
                                image_scope='inference', 
                                base_framework_version='tensorflow2.4.1', 
                                py_version='py37', 
                                container_version='ubuntu18.04', 
                                instance_type='ml.m5.4xlarge')
ecr_image

'763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-tensorflow-inference:2.4.1-transformers4.6.1-cpu-py37-ubuntu18.04'

Re-create the Model

In [7]:
model_name = f'bert-classifier-{current_timestamp}'

In [8]:
response = sagemaker_client.create_model(ModelName=model_name, 
                                         ExecutionRoleArn=role, 
                                         PrimaryContainer={'Image': ecr_image, 
                                                           'ModelDataUrl': MODEL_DATA_URL
                                                          })
model_arn = response['ModelArn']

logger.info(f'Created Model: {model_arn}')

Created Model: arn:aws:sagemaker:us-east-1:892313895307:model/bert-classifier-08-09-20-04


### Create Endpoint Configuration

Once you have a model, create an endpoint configuration with CreateEndpointConfig. Amazon SageMaker hosting services uses this configuration to deploy models.

In [9]:
endpoint_config_name = model_name

In [10]:
response = sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "VariantName": "variant-1",
            "ModelName": model_name,
            "InstanceType": "ml.m5.xlarge",
            "InitialInstanceCount": 1
        }
    ]
)
endpoint_config_arn = response['EndpointConfigArn']
logger.info(f"Created EndpointConfig: {endpoint_config_arn}")

Created EndpointConfig: arn:aws:sagemaker:us-east-1:892313895307:endpoint-config/bert-classifier-08-09-20-04


### Create Endpoint
Once you have your model and endpoint configuration, use the CreateEndpoint API to create your endpoint. The endpoint name must be unique within an AWS Region in your AWS account.

In [11]:
endpoint_name = model_name
response = sagemaker_client.create_endpoint(EndpointName=endpoint_name, 
                                            EndpointConfigName=endpoint_config_name)
endpoint_arn = response['EndpointArn']
logger.info(f'Created Endpoint: {endpoint_arn}')

Created Endpoint: arn:aws:sagemaker:us-east-1:892313895307:endpoint/bert-classifier-08-09-20-04


**Note:** Validate that the endpoint is created before invoking it.

In [12]:
waiter = sagemaker_client.get_waiter('endpoint_in_service')
logger.info("Waiting for endpoint to create...")
waiter.wait(EndpointName=endpoint_name)
response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
logger.info(f"Endpoint Status: {response['EndpointStatus']}")

Waiting for endpoint to create...
Endpoint Status: InService
