# Task Specific Transformer Model Demo

## English-to-Chinese Translation

- Model: `Helsinki-NLP/opus-mt-en-zh`
- Hugging Face: https://huggingface.co/Helsinki-NLP/opus-mt-en-zh


## Deploy Required Dependencies


In [None]:
%%sh

python3 -m pip install sagemaker boto3 botocore jsonlines -Uq

## Deploy Hugging Face Model to SageMaker

Deploy the Hugging Face model to an Amazon SageMaker real-time inference endpoint


In [None]:
# model used throughout the demonstration
HF_MODEL_ID = "Helsinki-NLP/opus-mt-en-zh"

In [None]:
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel

try:
    role = sagemaker.get_execution_role()
except ValueError:
    client_iam = boto3.client("iam")
    role = client_iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]

# hub model configuration. https://huggingface.co/models
hub = {"HF_MODEL_ID": HF_MODEL_ID, "HF_TASK": "translation"}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    transformers_version="4.37.0",
    pytorch_version="2.1.0",
    py_version="py310",
    env=hub,
    role=role,
)

In [None]:
# 1x to deploy model to SageMaker real-time inference endpoint

predictor = huggingface_model.deploy(
    initial_instance_count=1,  # number of instances
    instance_type="ml.g5.12xlarge",  # ec2 instance type
)

In [None]:
# output contains real-time inference endpoint name

predictor.endpoint_context()

In [None]:
%%time

predictor.predict(
    {
        "inputs": "A heart filled with anger has no room for love.",
    }
)

## Selecting an Existing SageMaker Endpoint


In [None]:
# deployed model real-time inference endpoint
SAGEMAKER_ENDPOINT = "<your_sagemaker_realtime_endpoint>"

In [None]:
from sagemaker.huggingface.model import HuggingFacePredictor

session = sagemaker.session.Session()

predictor = HuggingFacePredictor(
    endpoint_name=SAGEMAKER_ENDPOINT, sagemaker_session=session
)

In [None]:
%%time

predictor.predict(
    {
        "inputs": "A heart filled with anger has no room for love.",
        "parameters": {"max_length": 1024, "min_length": 1},
    }
)

## Using SageMaker Runtime Client for Inference

- Reference: <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker-runtime/client/invoke_endpoint.html>


In [None]:
%%time

import boto3

client_smr = boto3.client("sagemaker-runtime")

response = client_smr.invoke_endpoint(
    EndpointName=SAGEMAKER_ENDPOINT,
    Body=bytes(
        '{"inputs": "A heart filled with anger has no room for love."}', "utf-8"
    ),
    ContentType="application/json",
)

# decodes and prints the response body:
print(response["Body"].read().decode("utf-8"))

## Kaggle Dataset

`Quotes - 500k`

Description: "_The dataset is offered in CSV file format and contains three columns --- the quote, the author of the quote and the category tags for that quote. Examples of tags include --- love, life, philosophy, motivation, family, etc. These tags help in describing the various categories that a particular quote belongs to._"

Link: https://www.kaggle.com/datasets/manann/quotes-500k


In [None]:
import csv

file = open("./_prelims/quotes_10k_clean.csv", "r")
data = list(csv.reader(file, delimiter=","))
file.close()

quotes = []
for idx, row in enumerate(data):
    # skip longer quotes that cause errors with model inference ("model_max_length": 512 tokens)
    if len(row[0]) > 1024:
        continue
    quotes.append(row)

print(len(quotes))

In [None]:
# create lists of quotes of different sizes for testing
# skip headers on the first row

import json

quotes_10 = [column[0] for column in quotes[1:11]]
quotes_100 = [column[0] for column in quotes[1:101]]
quotes_1k = [column[0] for column in quotes[1:1001]]
quotes_10k = [column[0] for column in quotes[1:10001]]

print(len(quotes_10))
print(json.dumps(quotes_10[0:3], ensure_ascii=False, indent=4))

In [None]:
# calculate count and mean length of quotes

total_length = 0

for quote in quotes_10k:
    total_length += len(quote)

print(f"total length: {total_length}")
print(f"# of quotes: {len(quotes_10k)}")
print(f"avg. length of quotes: {round(total_length / len(quotes_10k))}")

## Batch Inference using a Real-time Endpoint

Perform real-time inference on list of quotes, sequentially, using a loop.


In [None]:
%%time

import time
import boto3

client_smr = boto3.client("sagemaker-runtime", region_name="us-east-1")

translations = []

for idx, quote in enumerate(quotes_1000):
    try:
        json = f'"inputs": "{quote}"'
        json = "{" + json + "}"
        response = client_smr.invoke_endpoint(
            EndpointName=SAGEMAKER_ENDPOINT,
            Body=bytes(json, "utf-8"),
            ContentType="application/json",
        )
        response_str = response["Body"].read().decode("utf-8")
        response_dict = eval(response_str)
        translation_text = response_dict[0]["translation_text"]
        translations.append({"input": quote, "output": translation_text})
    except client_smr.exceptions.ModelError as e:
        print(e)

    print(f"Translating quote: {idx}/1000", end="\r")

In [None]:
import json

print(len(translations))
print(json.dumps(translations[0:3], ensure_ascii=False, indent=4))

## Batch Transforms

Instance type for batch transforms must be one of the following types:

```txt
'InstanceType': 'ml.m4.xlarge'|'ml.m4.2xlarge'|'ml.m4.4xlarge'|'ml.m4.10xlarge'|
'ml.m4.16xlarge'|'ml.c4.xlarge'|'ml.c4.2xlarge'|'ml.c4.4xlarge'|'ml.c4.8xlarge'|
'ml.p2.xlarge'|'ml.p2.8xlarge'|'ml.p2.16xlarge'|'ml.p3.2xlarge'|'ml.p3.8xlarge'|
'ml.p3.16xlarge'|'ml.c5.xlarge'|'ml.c5.2xlarge'|'ml.c5.4xlarge'|'ml.c5.9xlarge'|
'ml.c5.18xlarge'|'ml.m5.large'|'ml.m5.xlarge'|'ml.m5.2xlarge'|'ml.m5.4xlarge'|
'ml.m5.12xlarge'|'ml.m5.24xlarge'|'ml.g4dn.xlarge'|'ml.g4dn.2xlarge'|
'ml.g4dn.4xlarge'|'ml.g4dn.8xlarge'|'ml.g4dn.12xlarge'|'ml.g4dn.16xlarge'
```


In [None]:
# s3 bucket used to store input data and outputs from batch transforms
S3_BUCKET = "<your_s3_bucket_name>"

### Write Quotes to JSON Lines Text Format


In [None]:
import jsonlines

filename = "./10k_quotes/quotes_10k_1.jsonl"

items = []

for quote in quotes_10k[0:2500]:
    items.append({"inputs": quote})

with jsonlines.open(filename, "w") as writer:
    writer.write_all(items)

### Copy JSON Lines Files to S3

The routine below will copy all input data files to your S3 bucket.


In [None]:
from sagemaker.s3 import S3Uploader, s3_path_join

files = [
    "quotes/quotes_10.jsonl",
    "quotes/quotes_100.jsonl",
    "quotes/quotes_1k.jsonl",
    "quotes/quotes_10k.jsonl",
]

for file in files:
    input_s3_path = s3_path_join("s3://", S3_BUCKET, "input_batch", "quotes")
    s3_file_uri = S3Uploader.upload(file, input_s3_path)
    print(f"{file} uploaded to {s3_file_uri}")

files = [
    "10k_quotes/quotes_10k_1.jsonl",
    "10k_quotes/quotes_10k_2.jsonl",
    "10k_quotes/quotes_10k_3.jsonl",
    "10k_quotes/quotes_10k_4.jsonl",
]

for file in files:
    input_s3_path = s3_path_join("s3://", S3_BUCKET, "input_batch", "10k_quotes")
    s3_file_uri = S3Uploader.upload(file, input_s3_path)
    print(f"{file} uploaded to {s3_file_uri}")

### Batch Transform with Hugging Face

References:

- <https://www.philschmid.de/sagemaker-inference-comparison>
- <https://huggingface.co/docs/sagemaker/inference#run-batch-transform-with--transformers-and-sagemaker>
- <https://discuss.huggingface.co/t/running-batch-transform-in-sagemaker-on-a-huggingface-model-from-the-hub-with-parameters/18390/3>


#### Multiple file batch of 10,000 quotes


In [None]:
%%time

import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel

try:
    role = sagemaker.get_execution_role()
except ValueError:
    client_iam = boto3.client("iam")
    role = client_iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]

# Hub model configuration <https://huggingface.co/models>
hub = {"HF_MODEL_ID": HF_MODEL_ID, "HF_TASK": "translation"}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    transformers_version="4.37.0",
    pytorch_version="2.1.0",
    py_version="py310",
    env=hub,
    role=role,
)

output_s3_path = f"s3://{S3_BUCKET}/output_batch"
s3_data_input = f"s3://{S3_BUCKET}/input_batch/10k_quotes/"

# starts batch transform job and uses S3 data as input
batch_job = huggingface_model.transformer(
    accept="application/json",
    assemble_with="Line",
    instance_count=2,
    instance_type="ml.g4dn.8xlarge",
    output_path=output_s3_path,
    strategy="SingleRecord",
)

batch_job.transform(
    content_type="application/json",
    data=s3_data_input,
    split_type="Line",
    logs=False,
)

### Execute Batch Transform using SageMaker Client

- <https://docs.aws.amazon.com/sagemaker/latest/dg/autopilot-deploy-models-batch.html>
- <https://github.com/huggingface/notebooks/blob/main/sagemaker/12_batch_transform_inference/sagemaker-notebook.ipynb>
- Troubleshooting: <https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-errors.html>


#### Single file batch of 1,000 quotes


In [None]:
import sagemaker
import boto3
import time

session = sagemaker.session.Session()
client_sm = boto3.client("sagemaker", region_name="us-east-1")

try:
    role = sagemaker.get_execution_role()
except ValueError:
    client_iam = boto3.client("iam")
    role = client_iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]

output_s3_path = f"s3://{S3_BUCKET}/output_batch"
s3_data_input = f"s3://{S3_BUCKET}/input_batch/quotes/quotes_1k.jsonl"
model_name = "<your_deployed_model_name>"
batch_job_name = f"quotes-batch-{int(time.time())}-1k"

epoch_time = int(time.time())

# launch batch transform job
response = client_sm.create_transform_job(
    TransformJobName=batch_job_name,
    ModelName=model_name,
    BatchStrategy="SingleRecord",
    TransformInput={
        "DataSource": {
            "S3DataSource": {
                "S3DataType": "S3Prefix",
                "S3Uri": s3_data_input,
            }
        },
        "ContentType": "application/json",  # file is jsonlines format, but avoiding error.
        "SplitType": "Line",
    },
    TransformOutput={
        "S3OutputPath": output_s3_path,
        "AssembleWith": "Line",
        "Accept": "application/json",
    },
    TransformResources={
        "InstanceType": "ml.p3.2xlarge",
        "InstanceCount": 1,
    },
)

#### Multiple file batch of 10,000 quotes


In [None]:
import sagemaker
import boto3
import time

session = sagemaker.session.Session()
client_sm = boto3.client("sagemaker", region_name="us-east-1")

try:
    role = sagemaker.get_execution_role()
except ValueError:
    client_iam = boto3.client("iam")
    role = client_iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]

output_s3_path = f"s3://{S3_BUCKET}/output_batch"
s3_data_input = f"s3://{S3_BUCKET}/input_batch/10k_quotes/"
model_name = "<your_deployed_model_name>"
batch_job_name = f"quotes-batch-{int(time.time())}-10k"

# lauch batch transform job
response = client_sm.create_transform_job(
    TransformJobName=batch_job_name,
    ModelName=model_name,
    BatchStrategy="SingleRecord",
    TransformInput={
        "DataSource": {
            "S3DataSource": {
                "S3DataType": "S3Prefix",
                "S3Uri": s3_data_input,
            }
        },
        "ContentType": "application/json",
        "SplitType": "Line",
    },
    TransformOutput={
        "S3OutputPath": output_s3_path,
        "AssembleWith": "Line",
        "Accept": "application/json",
    },
    TransformResources={
        "InstanceType": "ml.g4dn.8xlarge",
        "InstanceCount": 2,
    },
)

print(response["TransformJobArn"])

#### Track Batch Transform Progress

Reference: <https://github.com/aws/amazon-sagemaker-examples/blob/main/sagemaker_batch_transform/pytorch_flores_batch_transform/sagemaker_batch_transform_torchserve.ipynb>


In [None]:
%%time

while True:
    response = client_sm.describe_transform_job(TransformJobName=batch_job_name)
    status = response["TransformJobStatus"]
    if status == "Completed":
        print(f"Transform job ended with status: {status}")
        break
    if status == "Failed":
        message = response["FailureReason"]
        print("Transform failed with the following error: {}".format(message))
        raise Exception("Transform job failed")
    print(f"Transform job is still in status: {status}", end="\r")
    time.sleep(30)