##### Prerequisites

In [None]:
%%capture 

!pip install transformers==4.26.0
!pip install sagemaker==2.100.0

#### Imports

In [2]:
from sagemaker.huggingface import HuggingFaceModel
from sagemaker import get_execution_role
from transformers import pipeline
from sagemaker import Session
import transformers
import sagemaker
import logging
import tarfile
import os

#### Setup logging 

In [3]:
logger = logging.getLogger('sagemaker')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

#### Log versions of dependencies 

In [4]:
logger.info(f'[Using transformers: {transformers.__version__}]')
logger.info(f'[Using sageMaker: {sagemaker.__version__}]')

[Using transformers: 4.26.0]
[Using sageMaker: 2.100.0]


#### Setup essentials 

In [5]:
session = Session()
ROLE = get_execution_role()
S3_BUCKET = session.default_bucket()
INSTANCE_TYPE = 'ml.c5.2xlarge'
INSTANCE_COUNT = 2
TRANSFORMERS_VERSION = '4.17.0'
PYTORCH_VERSION = '1.10.2'
PYTHON_VERSION = 'py38'

#### Load PPO optimized model from previous module as a HF Pipeline 

In [6]:
gpt2_ppo_pipeline = pipeline('text-generation', model='../04-ppo/model/gpt2-ppo-bertscore', clean_up_tokenization_spaces=True)

Some weights of the model checkpoint at ../04-ppo/model/gpt2-ppo-bertscore were not used when initializing GPT2LMHeadModel: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


#### Save pipeline to local dir 

In [7]:
gpt2_ppo_pipeline.save_pretrained('./pipeline')

#### Archive saved pipeline artifacts 

In [8]:
def get_file_paths(directory: str) -> list:
        file_paths = [] 
        for root, directories, files in os.walk(directory):
            for file_name in files:
                file_path = os.path.join(root, file_name)
                file_paths.append(file_path)  
        return file_paths

In [9]:
def tar_artifacts(local_artifacts_path: str, tar_save_path: str, tar_name: str) -> None:
        if not os.path.exists(tar_save_path):
            os.makedirs(tar_save_path, exist_ok=True)
        tar = tarfile.open(f'{tar_save_path}/{tar_name}', 'w:gz')
        file_paths = get_file_paths(local_artifacts_path)  
        for file_path in file_paths:
            file_ = file_path.split('/')[-1]
            try:
                tar.add(file_path, arcname=file_) 
            except OSError:
                logger.info('Ignoring OSErrors during tar creation.')
        tar.close()

In [10]:
tar_artifacts('./pipeline/', '.', 'gpt2-ppo-pipeline.tar.gz')

#### Copy pipeline archive form local to S3 

In [11]:
!aws s3 cp gpt2-ppo-pipeline.tar.gz s3://sagemaker-us-east-1-119174016168/model/ppo-pipeline/gpt2-ppo-pipeline.tar.gz

upload: ./gpt2-ppo-pipeline.tar.gz to s3://sagemaker-us-east-1-119174016168/model/ppo-pipeline/gpt2-ppo-pipeline.tar.gz


#### Deploy GPT2 PPO pipeline as a SageMaker endpoint for real-time inference
Note: You can either deploy the saved GPT2 PPO model we created in the previous module (04-ppo) or deploy the pipeline we created above.

In [12]:
# MODEL_TAR_PATH = 'model/model.tar.gz'
MODEL_TAR_PATH = 'model/ppo-pipeline/gpt2-ppo-pipeline.tar.gz'

In [13]:
huggingface_model = HuggingFaceModel(model_data=f's3://{S3_BUCKET}/{MODEL_TAR_PATH}', 
                                     role=ROLE,
                                     transformers_version=TRANSFORMERS_VERSION, 
                                     pytorch_version=PYTORCH_VERSION,
                                     py_version=PYTHON_VERSION)

In [14]:
predictor = huggingface_model.deploy(initial_instance_count=INSTANCE_COUNT, 
                                     instance_type=INSTANCE_TYPE)

Creating model with name: huggingface-pytorch-inference-2023-02-10-17-36-31-386
CreateModel request: {
    "ModelName": "huggingface-pytorch-inference-2023-02-10-17-36-31-386",
    "ExecutionRoleArn": "arn:aws:iam::119174016168:role/service-role/AmazonSageMaker-ExecutionRole-20211014T093628",
    "PrimaryContainer": {
        "Image": "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.10.2-transformers4.17.0-cpu-py38-ubuntu20.04",
        "Environment": {
            "SAGEMAKER_PROGRAM": "",
            "SAGEMAKER_SUBMIT_DIRECTORY": "",
            "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
            "SAGEMAKER_REGION": "us-east-1"
        },
        "ModelDataUrl": "s3://sagemaker-us-east-1-119174016168/model/ppo-pipeline/gpt2-ppo-pipeline.tar.gz"
    }
}
Creating endpoint-config with name huggingface-pytorch-inference-2023-02-10-17-36-31-991
Creating endpoint with name huggingface-pytorch-inference-2023-02-10-17-36-31-991


-----!

#### Invoke endpoint for inference and perform answer engineering

In [64]:
data = {'inputs': 'can covid spread through water?'}

In [65]:
def extract_answer(response: list) -> str:
    text = response[0]['generated_text']
    ans = text.split('answer: ')[-1]
    ans = ans.capitalize()
    
    contains_incomplete_sent = True
    if ans.endswith('.'):
        contains_incomplete_sent = False
    
    sents = ans.split('. ')
    if contains_incomplete_sent:
        sents.pop()
        
    cleaned_ans = '. '.join(sents).strip()
    cleaned_ans = cleaned_ans + '.'
    return cleaned_ans

In [66]:
response = predictor.predict(data)
ans = extract_answer(response)
ans

'Currently there is no scientific evidence to suggest that covid-19 has been transmitted through chlorinated water.'

#### Delete endpoint (optional)

In [None]:
predictor.delete_endpoint()