In [None]:
!docker build -t $docker_repo:$docker_tag -f ./docker/Dockerfile ./docker

!docker push $image_uri


# NOTE:  THIS NOTEBOOK WILL TAKE A 5-10 MINUTES TO COMPLETE.

# PLEASE BE PATIENT.

# Serving a PyTorch Model as a REST Endpoint with TorchServe and SageMaker

We will deploy our BERT PyTorch Model as a REST Endpoint on SageMaker using TorchServe https://github.com/pytorch/serve/

TorchServe can be used for many types of inference in production settings. It provides an easy-to-use command line interface and utilizes REST based APIs handle state prediction requests.

<img src="./img/torchserve.png" width="90%">
  
More information on how to deploy Huggingface Transformers with TorchServe:
* https://github.com/pytorch/serve/tree/master/examples/Huggingface_Transformers
* https://medium.com/analytics-vidhya/deploy-huggingface-s-bert-to-production-with-pytorch-serve-27b068026d18 

In [None]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name
account_id = boto3.client('sts').get_caller_identity().get('Account')

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

# PRE-REQUISITE: 

## You need to have succesfully run the notebooks in the `TRAINING` section and converted your TF model into PyTorch before you continue with this notebook. 

In [None]:
%store -r training_job_name

In [None]:
try:
    training_job_name
    print('[OK]')
except NameError:
    print('+++++++++++++++++++++++++++++++')
    print('[ERROR] Please run the notebooks in the previous TRAIN section before you continue.')
    print('+++++++++++++++++++++++++++++++')

In [None]:
print(training_job_name)

In [None]:
%store -r transformer_pytorch_model_s3_uri

In [None]:
try:
    transformer_pytorch_model_s3_uri
    print('[OK]')
except NameError:
    print('+++++++++++++++++++++++++++++++')
    print('[ERROR] Please run the notebooks in the previous TRAIN section before you continue.')
    print('+++++++++++++++++++++++++++++++')

In [None]:
print(transformer_pytorch_model_s3_uri)

# Copy the Transformer PyTorch Model from S3 to Local

In [None]:
local_model_dir = './models/transformers/pytorch/'

In [None]:
!aws s3 cp --recursive $transformer_pytorch_model_s3_uri $local_model_dir

# Retrieve Transformer PyTorch Model Name (.bin) Created During Training

In [None]:
%store -r transformer_pytorch_model_name

In [None]:
try:
    transformer_pytorch_model_name
    print('[OK]')
except NameError:
    print('+++++++++++++++++++++++++++++++')
    print('[ERROR] Please run the notebooks in the previous TRAIN section before you continue.')
    print('+++++++++++++++++++++++++++++++')

In [None]:
print(transformer_pytorch_model_name)

# Create TorchServe Model Archive File (.mar)

https://github.com/pytorch/serve/blob/master/model-archiver/README.md

A key feature of TorchServe is the ability to package all model artifacts into a single model archive file. It is a separate command line interface (CLI), torch-model-archiver, that can take model checkpoints or model definition file with state_dict, and package them into a .mar file. This file can then be redistributed and served by anyone using TorchServe. It takes in the following model artifacts: a model checkpoint file in case of torchscript or a model definition file and a state_dict file in case of eager mode, and other optional assets that may be required to serve the model. The CLI creates a .mar file that TorchServe's server CLI uses to serve the models. 

We need to pass the the following:
* `--handler`:  Python code to adapt the `review_body` to BERT tokens (request handler) as well as the `star_rating` response of 1-5 (response handler)
* `config.json`:  used by the Huggingface transformers library when we saved the model in a previous notebook.  In 
* `setup_config.json`:  BERT-specific `setup_config.json` that defines the `max seq length`, `number of output classes` (1-5), etc.
* `Seq_classification_artifacts/index_to_name.json`:  BERT-specific mapping of response index (0-4) to class name (1-5 star rating) for our output classes

In [None]:
!mkdir -p ./model_store

In [None]:
!torch-model-archiver -f \
    --model-name model \
    --export-path ./model_store/ \
    --version 1.0 \
    --serialized-file $local_model_dir/$transformer_pytorch_model_name \
    --handler ./src_torchserve/Transformer_handler_generalized.py \
    --extra-files "./models/transformers/pytorch/config.json,./src_torchserve/setup_config.json,./src_torchserve/Seq_classification_artifacts/index_to_name.json"

In [None]:
!ls -al ./model_store/

# Start TorchServe locally to serve the model

After you archive and store the model, use the torchserve command to serve the model.

# Prepare the Model for SageMaker Deployment

To deploy the model to a SageMaker REST endpoint, we need to upload our .mar file to S3 and build a TorchServe model container. 

In [None]:
!unzip -o ./model_store/model.mar

# Upload TorchServe Model Archive File to S3

In [None]:
torchserve_mar = 'model.mar'

# Tar the `.mar` Archive File as `model.tar.gz` and Upload to S3
Per TorchServe convention, the `.mar` file must be under ./model_store/ in the `.tar` archive

In [None]:
!mkdir -p ./tmp/
!tar -cvzf ./tmp/model.tar.gz \
    ./model_store/$torchserve_mar

In [None]:
tmp_torchserve_model_name = 'reviews-distilbert-pytorch'

print(tmp_torchserve_model_name)

In [None]:
tmp_torchserve_tar_s3_uri = 's3://{}/models/torchserve/model.tar.gz'.format(bucket, tmp_torchserve_model_name)

print(tmp_torchserve_tar_s3_uri)

# Upload `model.tar.gz` to S3

In [None]:
!aws s3 cp ./tmp/model.tar.gz $tmp_torchserve_tar_s3_uri

In [None]:
print(tmp_torchserve_tar_s3_uri)

In [None]:
!aws s3 ls $tmp_torchserve_tar_s3_uri

# Build a TorchServe Docker Image

In [None]:
!pygmentize ./docker/Dockerfile

In [None]:
docker_repo = 'torchserve'
docker_tag = 'torch-1.5.0-1.0.0'

image_uri = f'{account_id}.dkr.ecr.{region}.amazonaws.com/{docker_repo}:{docker_tag}'

In [None]:
!docker build -t $docker_repo:$docker_tag -f ./docker/Dockerfile ./docker

# Check the Docker Image
If the image did not build properly, re-run the cell above.

In [None]:
!docker inspect $docker_repo:$docker_tag

# Push the Image to a Private Docker Repo (Amazon ECR)

In [None]:
import boto3
account_id = boto3.client('sts').get_caller_identity().get('Account')
region = boto3.session.Session().region_name

image_uri = '{}.dkr.ecr.{}.amazonaws.com/{}:{}'.format(account_id, region, docker_repo, docker_tag)
print(image_uri)

In [None]:
!$(aws ecr get-login --region $region --registry-ids $account_id --no-include-email)

### Ignore the `RepositoryNotFoundException` Error Below

In [None]:
!aws ecr describe-repositories --repository-names $docker_repo || aws ecr create-repository --repository-name $docker_repo

In [None]:
!docker tag $docker_repo:$docker_tag $image_uri

In [None]:
!docker push $image_uri

## Create SageMaker Endpoint and Deploy TorchServe Model Container

In [None]:
import time
timestamp = int(time.time())

pytorch_model_name = '{}-{}-{}'.format(training_job_name, 'pt', timestamp)

print(pytorch_model_name)

In [None]:
from sagemaker.model import Model
from sagemaker.predictor import Predictor

pytorch_model = Model(image_uri=image_uri,
                      model_data=tmp_torchserve_tar_s3_uri,                       
                      role=role,
                      predictor_cls=Predictor,
                      name=pytorch_model_name)

In [None]:
%store pytorch_model_name

In [None]:
import time

pytorch_endpoint_name = '{}-{}-{}'.format(training_job_name, 'pt', timestamp)

In [None]:
print(pytorch_endpoint_name)

predictor = pytorch_model.deploy(instance_type='ml.m5.large',
                                 initial_instance_count=1,
                                 endpoint_name=pytorch_endpoint_name,
                                 wait=False)

In [None]:
from IPython.core.display import display, HTML

display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/sagemaker/home?region={}#/endpoints/{}">REST Endpoint</a></b>'.format(region, pytorch_endpoint_name)))


# _Wait Until the Endpoint is Deployed_

In [None]:
%%time

waiter = sm.get_waiter('endpoint_in_service')
waiter.wait(EndpointName=pytorch_endpoint_name)

# _Wait Until the ^^ Endpoint ^^ is Deployed_

### Waiting for the Endpoint to be ready to Serve Predictions

In [None]:
import time

time.sleep(30)

# Predict the `star_rating` with `review_body` Samples from our TSV's

In [None]:
import csv

df_reviews = pd.read_csv('./data/amazon_reviews_us_Digital_Software_v1_00.tsv.gz', 
                         delimiter='\t', 
                         quoting=csv.QUOTE_NONE,
                         compression='gzip')
df_sample_reviews = df_reviews[['review_body', 'star_rating']].sample(n=50)
df_sample_reviews = df_sample_reviews.reset_index()
df_sample_reviews.shape

In [None]:
import pandas as pd

def predict(review_body):
    return predictor.predict(review_body).decode('utf-8')

df_sample_reviews['predicted_class'] = df_sample_reviews['review_body'].map(predict)
df_sample_reviews.head(5)

# Predict the `star_rating` with Ad Hoc `review_body` Samples

In [None]:
predicted_classes = predictor.predict('This is great!')

print(predicted_classes.decode('utf-8'))

In [None]:
list(predictor._model_names)

# Save for Next Notebook(s)

In [None]:
%store pytorch_endpoint_name

In [None]:
%store

## Delete Endpoint
To save money, we should delete the endpoint.

In [None]:
# sm.delete_endpoint(
#     EndpointName=pytorch_endpoint_name
# )

In [None]:
%%javascript
Jupyter.notebook.save_checkpoint();
Jupyter.notebook.session.delete();