In [80]:
%%capture
%load_ext dotenv
%dotenv
%pip install "sagemaker>=2.163.0"
!huggingface-cli login --token $HUGGING_FACE_TOKEN

In [15]:
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
from pprint import pprint


In [83]:
import sagemaker
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
import time
import boto3

sts_client = boto3.client("sts")
region = "us-east-1"

sagemaker_role = "arn:aws:iam::643713846674:role/sagemaker-executor"
session_name = "sagemaker-executor-session"

response = sts_client.assume_role(RoleArn=sagemaker_role, RoleSessionName=session_name)

credentials = response["Credentials"]

session = boto3.Session(
    aws_access_key_id=credentials["AccessKeyId"],
    aws_secret_access_key=credentials["SecretAccessKey"],
    aws_session_token=credentials["SessionToken"],
)
pprint(session)

sagemaker_client = session.client("sagemaker")
sagemaker_session = sagemaker.Session()

Session(region_name='us-east-1')


In [24]:
import sagemaker

# sagemaker_session = sagemaker.Session()
# region = sagemaker_session.boto_region_name
role = sagemaker_session.get_execution_role()
print(f"{role}")

AttributeError: 'Session' object has no attribute 'get_execution_role'

In [54]:
huggingface_image_uri = get_huggingface_llm_image_uri(
  backend="huggingface", # or lmi
  region=region
)
pprint(huggingface_image_uri)

'763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4.2-gpu-py310-cu121-ubuntu22.04'


In [55]:
# model_name = "grammarly/coedit-large"
model_name = "grammarly-coedit-large-2-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

hub = {
    # "HF_MODEL_ID": "EleutherAI/gpt-neox-20b",
    "HF_MODEL_ID": "grammarly/coedit-large",
    "HF_TASK": "text-generation",
    "SM_NUM_GPUS": "1",
    "HF_MODEL_QUANTIZE": "bitsandbytes",
}

model = HuggingFaceModel(name=model_name, env=hub, role=sagemaker_role, image_uri=huggingface_image_uri)
pprint(model)

<sagemaker.huggingface.model.HuggingFaceModel object at 0x7f50f9cc3e30>


In [53]:
from sagemaker import image_uris

pytorch_inference_image = image_uris.retrieve(
    framework="pytorch",
    region=region,
    version="2.1",
    # py_version="py3",
    instance_type="ml.g4dn.2xlarge",
    image_scope="inference",
)

print(pytorch_inference_image)

763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-inference:2.1-gpu-py310


In [71]:
from sagemaker import get_execution_role, Session, image_uris
import pandas as pd
import time
import os
from pprint import pprint
from transformers import AutoTokenizer, T5ForConditionalGeneration, AutoModelForSeq2SeqLM, AutoTokenizer, T5Tokenizer, pipeline

export_dir = "./model/"

if not os.path.exists(export_dir):
    os.makedirs(export_dir)
    print("Directory ", export_dir, " Created ")
else:
    print("Directory ", export_dir, " already exists")

# coedit_large_tokenizer = AutoTokenizer.from_pretrained("grammarly/coedit-large")
# coedit_large_model = T5ForConditionalGeneration.from_pretrained("grammarly/coedit-large")
coedit_large_pipeline = pipeline("sentiment-analysis")

coedit_large_pipeline.save_pretrained("./model")

model_archive_name = "hf-model.tar.gz"
payload_archive_name = "hf_payload.tar.gz"

# !cd model && tar -cvpzf ../{model_archive_name} *
# !cd model/sample-payload && tar czvf ../../{payload_archive_name} *

bucket = sagemaker.Session().default_bucket()

prefix = "sagemaker/huggingface-pytorch-inference-recommender"

sample_payload_url = sagemaker.Session().upload_data(
    payload_archive_name, bucket=bucket, key_prefix=prefix + "/inference"
)
model_url = sagemaker.Session().upload_data(
    model_archive_name, bucket=bucket, key_prefix=prefix + "/grammarly-coedit-large/model"
)


print(sample_payload_url)
print(model_url)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Directory  ./model/  already exists
s3://sagemaker-us-east-1-643713846674/sagemaker/huggingface-pytorch-inference-recommender/inference/hf_payload.tar.gz
s3://sagemaker-us-east-1-643713846674/sagemaker/huggingface-pytorch-inference-recommender/grammarly-coedit-large/model/hf-model.tar.gz


In [77]:
import boto3

client = boto3.client("sagemaker", region)

model_package_group_name = "huggingface-pytorch-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
print(model_package_group_name)

model_pacakge_group_response = client.create_model_package_group(
    ModelPackageGroupName=str(model_package_group_name),
    ModelPackageGroupDescription="My sample HuggingFace PyTorch model package group",
)

print(model_pacakge_group_response)

ml_domain = "NATURAL_LANGUAGE_PROCESSING"
ml_task = "FILL_MASK"
ml_framework = "PYTORCH"
framework_version = "1.6.0"
model = "grammarly-coedit-large"

model_package_version_response = client.create_model_package(
    ModelPackageGroupName=str(model_package_group_name),
    ModelPackageDescription="HuggingFace PyTorch Inference Recommender Demo",
    Domain=ml_domain,
    Task=ml_task,
    SamplePayloadUrl=sample_payload_url,
    InferenceSpecification={
        "Containers": [
            {
                "ContainerHostname": "huggingface-pytorch",
                "Image": huggingface_image_uri,
                "ModelDataUrl": model_url,
                "Framework": ml_framework,
                "NearestModelName": model,
                "Environment": {
                    "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
                    "SAGEMAKER_PROGRAM": "inference.py",
                    "SAGEMAKER_REGION": region,
                    "SAGEMAKER_SUBMIT_DIRECTORY": model_url,
                },
            },
        ],
        "SupportedRealtimeInferenceInstanceTypes": [
            "ml.g5.xlarge",
            "ml.g4dn.xlarge",
        ],
        "SupportedContentTypes": ["text/csv"],
        "SupportedResponseMIMETypes": ["text/csv"],
    },
)

pprint(model_package_version_response)

huggingface-pytorch-2024-04-08-00-40-32
{'ModelPackageGroupArn': 'arn:aws:sagemaker:us-east-1:643713846674:model-package-group/huggingface-pytorch-2024-04-08-00-40-32', 'ResponseMetadata': {'RequestId': '6cb9c8b4-ed73-4f06-8cb0-343bb45a73dd', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '6cb9c8b4-ed73-4f06-8cb0-343bb45a73dd', 'content-type': 'application/x-amz-json-1.1', 'content-length': '127', 'date': 'Mon, 08 Apr 2024 00:40:32 GMT'}, 'RetryAttempts': 0}}
{'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:643713846674:model-package/huggingface-pytorch-2024-04-08-00-40-32/1',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '118',
                                      'content-type': 'application/x-amz-json-1.1',
                                      'date': 'Mon, 08 Apr 2024 00:40:32 GMT',
                                      'x-amzn-requestid': 'd7f39b37-9edc-4592-b53e-9c444df30032'},
                      'HTTPStatusCode': 200,
                      'RequestId':

In [84]:
import boto3
import datetime

default_job = "huggingface-pytorch-basic-recommender-job-" + datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
default_response = sagemaker_client.create_inference_recommendations_job(
    JobName=str(default_job),
    JobDescription="HuggingFace PyTorch Inference Basic Recommender Job",
    JobType="Default",
    RoleArn=sagemaker_role,
    InputConfig={"ModelPackageVersionArn": model_package_version_response["ModelPackageArn"]},
)

print(default_response)

{'JobArn': 'arn:aws:sagemaker:us-east-1:643713846674:inference-recommendations-job/huggingface-pytorch-basic-recommender-job-2024-04-07-20-44-22', 'ResponseMetadata': {'RequestId': 'a8a0d26a-4761-4676-922a-ba6bc0636751', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'a8a0d26a-4761-4676-922a-ba6bc0636751', 'content-type': 'application/x-amz-json-1.1', 'content-length': '145', 'date': 'Mon, 08 Apr 2024 00:44:23 GMT'}, 'RetryAttempts': 0}}


In [85]:
%%time

import boto3
import pprint
import pandas as pd

client = boto3.client("sagemaker", region)

ended = False
while not ended:
    inference_recommender_job = client.describe_inference_recommendations_job(JobName=str(default_job))
    if inference_recommender_job["Status"] in ["COMPLETED", "STOPPED", "FAILED"]:
        ended = True
    else:
        print("Inference recommender job in progress")
        time.sleep(60)

if inference_recommender_job["Status"] == "FAILED":
    print("Inference recommender job failed ")
    print("Failed Reason: {}".inference_recommender_job["FailedReason"])
else:
    print("Inference recommender job completed")

Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference recommender job in progress
Inference re

AttributeError: 'str' object has no attribute 'inference_recommender_job'

In [42]:
# https://aws.amazon.com/ec2/instance-types/

predictor = model.deploy(
    initial_instance_count=1,
    # instance_type="ml.g4dn.xlarge",
    instance_type="ml.g4dn.2xlarge",
    endpoint_name=model_name
)
pprint(predictor)

--------!<sagemaker.huggingface.model.HuggingFacePredictor object at 0x7f50fa0c3f80>


In [45]:
input_data = {
  "inputs": "fix grammar: I goes to the store",
  "parameters": {
    "do_sample": True,
    "max_new_tokens": 100,
    "temperature": 1,
    # "watermark": True
  }
}

response = predictor.predict(input_data)
pprint(response)

[{'generated_text': 'I will go to the store.'}]


In [75]:
import boto3
import pandas as pd

client = boto3.client("sagemaker", region)

list_model_metadata_response = client.list_model_metadata(MaxResults=50)

domains = []
frameworks = []
framework_versions = []
tasks = []
models = []

for model_summary in list_model_metadata_response["ModelMetadataSummaries"]:
    domains.append(model_summary["Domain"])
    tasks.append(model_summary["Task"])
    models.append(model_summary["Model"])
    frameworks.append(model_summary["Framework"])
    framework_versions.append(model_summary["FrameworkVersion"])

data = {
    "Domain": domains,
    "Task": tasks,
    "Framework": frameworks,
    "FrameworkVersion": framework_versions,
    "Model": models,
}

df = pd.DataFrame(data)

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 1000)
pd.set_option("display.colheader_justify", "center")
pd.set_option("display.precision", 3)


display(df.sort_values(by=["Domain", "Task", "Framework", "FrameworkVersion"]))

Unnamed: 0,Domain,Task,Framework,FrameworkVersion,Model
9,COMPUTER_VISION,IMAGE_CLASSIFICATION,MXNET,1.8.0,densenet201-gluon
10,COMPUTER_VISION,IMAGE_CLASSIFICATION,MXNET,1.8.0,resnet18v2-gluon
14,COMPUTER_VISION,IMAGE_CLASSIFICATION,PYTORCH,1.6.0,resnet152
0,COMPUTER_VISION,IMAGE_CLASSIFICATION,TENSORFLOW,1.15.5,efficientnetb7
4,COMPUTER_VISION,IMAGE_CLASSIFICATION,TENSORFLOW,1.15.5,nasnetlarge
5,COMPUTER_VISION,IMAGE_CLASSIFICATION,TENSORFLOW,1.15.5,vgg16
6,COMPUTER_VISION,IMAGE_CLASSIFICATION,TENSORFLOW,1.15.5,inception-v3
11,COMPUTER_VISION,IMAGE_CLASSIFICATION,TENSORFLOW,1.15.5,xception
12,COMPUTER_VISION,IMAGE_CLASSIFICATION,TENSORFLOW,1.15.5,densenet201
17,COMPUTER_VISION,IMAGE_CLASSIFICATION,TENSORFLOW,1.15.5,xceptionV1-keras
