#### Prerequisites

In [1]:
%%capture 

!pip install sagemaker==2.100.0
!pip install jedi==0.17  # this is a requirement for pygmentize to work

#### Imports 

In [2]:
from sagemaker.huggingface import HuggingFaceModel
from sagemaker import get_execution_role
from sagemaker import Session
import sagemaker
import logging

##### Setup logging

In [3]:
logger = logging.getLogger('sagemaker')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

##### Log versions of dependencies 

In [4]:
logger.info(f'[Using SageMaker: {sagemaker.__version__}]')

[Using SageMaker: 2.100.0]


#### Essentials 

In [5]:
session = Session()
ROLE = get_execution_role()
S3_BUCKET = session.default_bucket()
INSTANCE_TYPE = 'ml.p3.2xlarge'
INSTANCE_COUNT = 2
TRANSFORMERS_VERSION = '4.17.0'
PYTORCH_VERSION = '1.10.2'
PYTHON_VERSION = 'py38'

In [6]:
logger.info(f'S3 bucket = {S3_BUCKET}')

S3 bucket = sagemaker-us-east-1-119174016168


#### Deploy CovidBERT multi-class classifier as a SageMaker endpoint for real-time inference 
**Note:** You can either deploy the saved model or pipeline tar here.

In [7]:
# MODEL_TAR_PATH = 'model/finetuned-clf/model-tar/model.tar.gz'
MODEL_TAR_PATH = 'model/finetuned-clf-custom/pipeline-tar/pipeline.tar.gz'

In [8]:
huggingface_model = HuggingFaceModel(model_data=f's3://{S3_BUCKET}/{MODEL_TAR_PATH}', 
                                     role=ROLE,
                                     transformers_version=TRANSFORMERS_VERSION, 
                                     pytorch_version=PYTORCH_VERSION,
                                     py_version=PYTHON_VERSION)

In [9]:
predictor = huggingface_model.deploy(initial_instance_count=INSTANCE_COUNT, 
                                     instance_type=INSTANCE_TYPE)

Creating model with name: huggingface-pytorch-inference-2022-09-23-20-11-48-796
CreateModel request: {
    "ModelName": "huggingface-pytorch-inference-2022-09-23-20-11-48-796",
    "ExecutionRoleArn": "arn:aws:iam::119174016168:role/service-role/AmazonSageMaker-ExecutionRole-20211014T093628",
    "PrimaryContainer": {
        "Image": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.10.2-transformers4.17.0-gpu-py38-cu113-ubuntu20.04",
        "Environment": {
            "SAGEMAKER_PROGRAM": "",
            "SAGEMAKER_SUBMIT_DIRECTORY": "",
            "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
            "SAGEMAKER_REGION": "us-east-1"
        },
        "ModelDataUrl": "s3://sagemaker-us-east-1-119174016168/model/finetuned-clf-custom/pipeline-tar/pipeline.tar.gz"
    }
}
Creating endpoint-config with name huggingface-pytorch-inference-2022-09-23-20-11-49-573
Creating endpoint with name huggingface-pytorch-inference-2022-09-23-20-11-49-573


-----------!

#### Invoke endpoint for inference

In [10]:
data = {'inputs': 'Is the COVID outbreak impact Apple iPhone sales?'}
response = predictor.predict(data)
response

[{'label': 'business', 'score': 0.38689836859703064}]

#### Delete endpoint (optional)

In [11]:
predictor.delete_endpoint()

Deleting endpoint configuration with name: huggingface-pytorch-inference-2022-09-23-20-11-49-573
Deleting endpoint with name: huggingface-pytorch-inference-2022-09-23-20-11-49-573
