## Deploy foundational models using SageMaker Jumpstart

#### Imports 

In [2]:
from sagemaker.utils import name_from_base
from sagemaker.predictor import Predictor
from sagemaker.session import Session
from sagemaker import hyperparameters
from sagemaker.model import Model
from sagemaker import script_uris
from sagemaker import image_uris
from sagemaker import model_uris
import sagemaker
import logging
import json

##### Setup logger

In [3]:
logger = logging.getLogger('sagemaker')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

##### Log versions of dependencies

In [4]:
logger.info(f'[Using sagemaker: {sagemaker.__version__}]')

[Using sagemaker: 2.100.0]


#### Setup essentials

In [5]:
MODEL_ID = 'huggingface-textgeneration-gpt2'  # do not change this 
MODEL_VERSION = '*'

In [6]:
ENDPOINT_NAME = 'gpt2'
INSTANCE_TYPE = 'ml.p2.xlarge'

In [7]:
sagemaker_session = Session()
ROLE = sagemaker_session.get_caller_identity_arn()
ROLE

'arn:aws:iam::119174016168:role/service-role/AmazonSageMaker-ExecutionRole-20211014T093628'

Retrieve HuggingFace DLC image ECR URI 

In [8]:
image_uri = image_uris.retrieve(framework=None,  # automatically inferred via MODE_ID
                                region=None,     # automatically inferred via MODE_ID
                                image_scope='inference', 
                                model_id=MODEL_ID, 
                                model_version=MODEL_VERSION, 
                                instance_type=INSTANCE_TYPE)
image_uri

'763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.10.2-transformers4.17.0-gpu-py38-cu113-ubuntu20.04'

Retrieve inference script S3 URI containing all dependencies and scripts for model loading and inference handling 

In [9]:
source_uri = script_uris.retrieve(model_id=MODEL_ID, 
                                  model_version=MODEL_VERSION, 
                                  script_scope='inference')
source_uri

's3://jumpstart-cache-prod-us-east-1/source-directory-tarballs/huggingface/inference/textgeneration/v1.1.2/sourcedir.tar.gz'

Retrieve S3 URI for the pre-trained factory default model

In [10]:
model_uri = model_uris.retrieve(model_id=MODEL_ID, 
                                model_version=MODEL_VERSION, 
                                model_scope='inference')
model_uri

's3://jumpstart-cache-prod-us-east-1/huggingface-infer/infer-huggingface-textgeneration-gpt2.tar.gz'

#### Create model

In [11]:
model = Model(image_uri=image_uri, 
              source_dir=source_uri, 
              model_data=model_uri, 
              entry_point='inference.py', 
              role=ROLE, 
              predictor_cls=Predictor, 
              name=ENDPOINT_NAME)

#### Deploy model 

In [12]:
model_predictor = model.deploy(initial_instance_count=2, 
                               instance_type=INSTANCE_TYPE, 
                               predictor_cls=Predictor, 
                               endpoint_name=ENDPOINT_NAME)

Creating model with name: gpt2
CreateModel request: {
    "ModelName": "gpt2",
    "ExecutionRoleArn": "arn:aws:iam::119174016168:role/service-role/AmazonSageMaker-ExecutionRole-20211014T093628",
    "PrimaryContainer": {
        "Image": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.10.2-transformers4.17.0-gpu-py38-cu113-ubuntu20.04",
        "Environment": {
            "SAGEMAKER_PROGRAM": "inference.py",
            "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code",
            "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
            "SAGEMAKER_REGION": "us-east-1"
        },
        "ModelDataUrl": "s3://sagemaker-us-east-1-119174016168/gpt2/model.tar.gz"
    },
    "Tags": [
        {
            "Key": "aws-jumpstart-inference-model-uri",
            "Value": "s3://jumpstart-cache-prod-us-east-1/huggingface-infer/infer-huggingface-textgeneration-gpt2.tar.gz"
        },
        {
            "Key": "aws-jumpstart-inference-script-uri",
            "Valu

--------------!

#### Invoke deployed model (SageMaker endpoint) for real-time inference 

In [13]:
data = 'China is effectively in a lockdown.'

In [14]:
encoded_text = json.dumps(data).encode('utf-8')
encoded_text

b'"China is effectively in a lockdown."'

In [15]:
CONTENT_TYPE = 'application/x-text'
ACCEPT_TYPE = 'application/json'

In [16]:
HEADERS = {'ContentType': CONTENT_TYPE, 
           'Accept': ACCEPT_TYPE}

In [17]:
response = model_predictor.predict(encoded_text, HEADERS)
response

b'{"generated_text": "\\"China is effectively in a lockdown.\\" By contrast, the US has been allowed to carry out several other military exercises over the last year, including one over Guam and \\"Dana for Life\\", an air raid against \\"Sleeping Dragon\\" in"}'

In [18]:
response = json.loads(response)
generated_text = response['generated_text']
generated_text

'"China is effectively in a lockdown." By contrast, the US has been allowed to carry out several other military exercises over the last year, including one over Guam and "Dana for Life", an air raid against "Sleeping Dragon" in'