# Deploy Hugging Face transformer models with multi-model endpoints 

This notebook is a step-by-step tutorial on deploying multiple pre-trained PyTorch Hugging Face model with multi-model endpoint on Amazon SageMaker. 

We will describe the steps for deploying a multi-model endpoint on Amazon SageMaker with TorchServe serving stack. An additional step compared to single model deployment is the requirement to create a manifest file for each model prior to deployment. For training Hugging Face models on SageMaker, refer the examples [here](https://github.com/huggingface/notebooks/tree/master/sagemaker)

In [None]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.utils import name_from_base
from sagemaker.pytorch import PyTorchModel
import boto3
import torch
import time
import pandas as pd
from pathlib import Path
import tarfile
import shutil 
import datetime

role = get_execution_role()
region = boto3.Session().region_name
session = sagemaker.Session()
bucket = session.default_bucket()
sm_client = boto3.client("sagemaker", region)
s3_client = boto3.client('s3')
prefix = "sagemaker/huggingface-pytorch-sentiment-analysis"

In [None]:
%store
%store -r

In [None]:
try:
    describe_model_package_group_response = sm_client.describe_model_package_group(
        ModelPackageGroupName=model_package_group_name
    )
    print(describe_model_package_group_response)
except:
    print(f"model package group {model_package_group_name} does not exist")

### Register a new model version for Hugging Face roberta model with entry point script helper function
we will firstly download the roberta model file and prepare the model with inference script to be used in the endpoint.

In [None]:
local_artifact_path = Path("model_artifacts")
local_artifact_path.mkdir(exist_ok=True, parents=True)
model_tar_name = 'model_roberta_update.tar.gz'

In [None]:
s3_client.download_file(bucket, '/'.join(model_roberta_uri.split('/')[3:]), model_roberta_uri.split('/')[-1])

In [None]:
with tarfile.open(model_tar_name) as tar:
    tar.extractall(path=local_artifact_path.stem)

In [None]:
shutil.copytree('../code', local_artifact_path / 'code') 

In [None]:
def create_tar(tarfile_name: str, local_path: Path):
    """
    Create a tar.gz archive with the content of `local_path`.
    """
    with tarfile.open(tarfile_name, mode="w:gz") as archive:
        [
            archive.add(k, arcname=f"{k.relative_to(local_path)}")
            for k in local_path.glob("**/*.*")
            if f"{k.relative_to(local_path)}"[0] != "."
        ]
    tar_size = Path(tarfile_name).stat().st_size / 10**6
    return tar_size

In [None]:
tar_size = create_tar(model_tar_name, local_artifact_path)
print(f"Created {model_tar_name}, size {tar_size:.2f} MB")

In [None]:
from sagemaker.s3 import S3Uploader,s3_path_join
model_data_path = s3_path_join("s3://",bucket,prefix+"/models")
model_roberta_uri =S3Uploader.upload(model_tar_name, model_data_path)
print(f"Uploaded roberta model to {model_roberta_uri}")

In [None]:
list_model_packages_response = sm_client.list_model_packages(
    ModelPackageGroupName=model_package_group_name
)
list_model_packages_response

In [None]:
roberta_model_version_arn = [x['ModelPackageArn'] for x in list_model_packages_response['ModelPackageSummaryList'] if "roberta" in x['ModelPackageDescription'].lower()]

In [None]:
describe_model_package_response = sm_client.describe_model_package(
    ModelPackageName=roberta_model_version_arn[0]
)
describe_model_package_response

In [None]:
model_package_response = sm_client.create_model_package(
    ModelPackageGroupName=str(model_package_group_name),
    ModelPackageDescription=f"Hugging Face Roberta Model MME - sentiment analysis",
    Domain=describe_model_package_response['Domain'],
    Task=describe_model_package_response['Task'],
    InferenceSpecification={
        "Containers": [
            {
                "ContainerHostname": "huggingface-pytorch-roberta-update",
                "Image": describe_model_package_response['InferenceSpecification']['Containers'][0]['Image'],
                "ModelDataUrl": model_roberta_uri,
                "Framework": describe_model_package_response['InferenceSpecification']['Containers'][0]['Framework'],
                "NearestModelName": describe_model_package_response['InferenceSpecification']['Containers'][0]['NearestModelName'],
                "Environment": {
                    "SAGEMAKER_CONTAINER_LOG_LEVEL": describe_model_package_response['InferenceSpecification']['Containers'][0]['Environment']['SAGEMAKER_CONTAINER_LOG_LEVEL'],
                    "SAGEMAKER_PROGRAM": "inference.py",
                    "SAGEMAKER_REGION": region,
                    "SAGEMAKER_SUBMIT_DIRECTORY": model_roberta_uri,
                    "HF_TASK": describe_model_package_response['InferenceSpecification']['Containers'][0]['Environment']['HF_TASK'],
                },
            },
        ],
        "SupportedRealtimeInferenceInstanceTypes": describe_model_package_response['InferenceSpecification']['SupportedRealtimeInferenceInstanceTypes'],
        "SupportedContentTypes": ["text/csv"],
        "SupportedResponseMIMETypes": ["application/json"],
    },
)

In [None]:
list_model_packages_response = sm_client.list_model_packages(
    ModelPackageGroupName=model_package_group_name
)
list_model_packages_response

In [None]:
roberta_model_version_arn = list_model_packages_response["ModelPackageSummaryList"][0]["ModelPackageArn"]
print("roberta model: {}".format(roberta_model_version_arn))
distilbert_model_version_arn = list_model_packages_response["ModelPackageSummaryList"][1]["ModelPackageArn"]
print("distilbert model: {}".format(distilbert_model_version_arn))

In [None]:
model_package_update_input_dict = {
    "ModelPackageArn": roberta_model_version_arn,
    "ModelApprovalStatus": "Approved",
}
model_package_update_response = sm_client.update_model_package(**model_package_update_input_dict)
model_package_update_response

In [None]:
roberta_update_model_name = "hf-pytorch-model-roberta-update-" + datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
print("Model name : {}".format(roberta_update_model_name))
%store roberta_update_model_name

In [None]:
primary_container_roberta = {
    "ModelPackageName": roberta_model_version_arn,
}

create_model_roberta_respose = sm_client.create_model(
    ModelName=roberta_update_model_name, 
    ExecutionRoleArn=role, 
    PrimaryContainer=primary_container_roberta
)

print("Model arn : {}".format(create_model_roberta_respose["ModelArn"]))

In [None]:
%store roberta_update_model_name

In [None]:
image_uri = describe_model_package_response['InferenceSpecification']['Containers'][0]['Image']
image_uri 

In [None]:
deploy_instance_type = describe_model_package_response['InferenceSpecification']['SupportedRealtimeInferenceInstanceTypes'][1]

In [None]:
from sagemaker.model import Model
from sagemaker.multidatamodel import MultiDataModel
import time

dummy_model = Model(name = 'dummy_model_pt',
                    image_uri = image_uri,
                    role = role,
                    model_data = '')

multi_model = MultiDataModel(name              = 'pytorch-multi-model-senti-' + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime()),
                             model             = dummy_model,
                             model_data_prefix = model_data_path)

endpoint_name = 'hf-pytorch-multimodel-senti-endpoint-' + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

predictor = multi_model.deploy(instance_type=deploy_instance_type,
                               initial_instance_count=1,
                               endpoint_name = endpoint_name)

In [None]:
# from sagemaker.predictor import Predictor
# from sagemaker.serializers import CSVSerializer, JSONSerializer
# from sagemaker.deserializers import JSONDeserializer
# pred = Predictor(endpoint_name)
# pred.serializer = sagemaker.serializers.CSVSerializer()
# pred.deserializer = sagemaker.deserializers.JSONDeserializer()
# pred1.predict(test_data.to_csv(header=False, index=False),target_model=model_archive)

In [None]:
test_data = pd.read_csv("../sample_payload/test_data.csv", header=None)
test_data

In [None]:
sm_runtime = boto3.client("sagemaker-runtime")

In [None]:
%%time
model_archive = '/model_roberta_update.tar.gz'
response = sm_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=test_data.to_csv(header=False, index=False),
    ContentType="text/csv",
    TargetModel=model_archive,
)

print(response["Body"].read())

In [None]:
%%time
model_archive = '/model_distilbert.tar.gz'
response = sm_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=test_data.to_csv(header=False, index=False),
    ContentType="text/csv",
    TargetModel=model_archive,
)

print(response["Body"].read())