#### Prerequisites

In [2]:
%%capture 

!pip install sagemaker==2.100.0

#### Imports 

In [3]:
from sagemaker.huggingface import HuggingFaceModel
from sagemaker import get_execution_role
from sagemaker import Session
import sagemaker
import logging
import tarfile
import os

##### Setup logging

In [4]:
logger = logging.getLogger('sagemaker')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

##### Log versions of dependencies 

In [5]:
logger.info(f'[Using SageMaker: {sagemaker.__version__}]')

[Using SageMaker: 2.100.0]


#### Essentials 

In [6]:
session = Session()
ROLE = get_execution_role()
S3_BUCKET = session.default_bucket()
INSTANCE_TYPE = 'ml.c5.2xlarge'
INSTANCE_COUNT = 2
TRANSFORMERS_VERSION = '4.17.0'
PYTORCH_VERSION = '1.10.2'
PYTHON_VERSION = 'py38'

In [7]:
logger.info(f'S3 bucket = {S3_BUCKET}')

S3 bucket = sagemaker-us-east-1-119174016168


#### Deploy custom GPT2 pipeline as a SageMaker endpoint for real-time inference 
**Note:** You can either deploy the saved GPT2 model or the pipeline tar.gz we created in the previous module (04-evaluation) here.

In [8]:
# MODEL_TAR_PATH = 'model/model.tar.gz'
MODEL_TAR_PATH = 'model/pipelines/pipeline.tar.gz'

In [9]:
huggingface_model = HuggingFaceModel(model_data=f's3://{S3_BUCKET}/{MODEL_TAR_PATH}', 
                                     role=ROLE,
                                     transformers_version=TRANSFORMERS_VERSION, 
                                     pytorch_version=PYTORCH_VERSION,
                                     py_version=PYTHON_VERSION)

In [10]:
predictor = huggingface_model.deploy(initial_instance_count=INSTANCE_COUNT, 
                                     instance_type=INSTANCE_TYPE)

Creating model with name: huggingface-pytorch-inference-2023-01-26-19-40-53-621
CreateModel request: {
    "ModelName": "huggingface-pytorch-inference-2023-01-26-19-40-53-621",
    "ExecutionRoleArn": "arn:aws:iam::119174016168:role/service-role/AmazonSageMaker-ExecutionRole-20211014T093628",
    "PrimaryContainer": {
        "Image": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.10.2-transformers4.17.0-cpu-py38-ubuntu20.04",
        "Environment": {
            "SAGEMAKER_PROGRAM": "",
            "SAGEMAKER_SUBMIT_DIRECTORY": "",
            "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
            "SAGEMAKER_REGION": "us-east-1"
        },
        "ModelDataUrl": "s3://sagemaker-us-east-1-119174016168/model/pipelines/pipeline.tar.gz"
    }
}
Creating endpoint-config with name huggingface-pytorch-inference-2023-01-26-19-40-54-230
Creating endpoint with name huggingface-pytorch-inference-2023-01-26-19-40-54-230


-----!

#### Invoke endpoint for inference

In [13]:
data = {'inputs': 'China is effectively in a lockdown.'}
response = predictor.predict(data)
response

[{'generated_text': 'China is effectively in a lockdown. this is far from the worst possible situation. this month, for example, there were more than 25,000 covid-19 deaths and hospitals were overflowing with patients. “ everyone is panicking. at the beginning'}]

#### Delete endpoint (optional)

In [None]:
predictor.delete_endpoint()