In [2]:
# Seamless LLM Deployment with AWS SageMaker and Hugging Face
#https://medium.com/@nwenwehtaywin/seamless-llm-deployment-with-aws-sagemaker-and-hugging-face-3bffb3e4a596

!pip install sagemaker --upgrade


Collecting sagemaker
  Downloading sagemaker-2.252.0-py3-none-any.whl.metadata (17 kB)
Collecting attrs<26,>=24 (from sagemaker)
  Downloading attrs-25.3.0-py3-none-any.whl.metadata (10 kB)
Collecting boto3<2.0,>=1.39.5 (from sagemaker)
  Downloading boto3-1.40.42-py3-none-any.whl.metadata (6.7 kB)
Collecting botocore<1.41.0,>=1.40.42 (from boto3<2.0,>=1.39.5->sagemaker)
  Downloading botocore-1.40.42-py3-none-any.whl.metadata (5.7 kB)
Collecting s3transfer<0.15.0,>=0.14.0 (from boto3<2.0,>=1.39.5->sagemaker)
  Downloading s3transfer-0.14.0-py3-none-any.whl.metadata (1.7 kB)
Downloading sagemaker-2.252.0-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m105.6 MB/s[0m  [33m0:00:00[0m
[?25hDownloading attrs-25.3.0-py3-none-any.whl (63 kB)
Downloading boto3-1.40.42-py3-none-any.whl (139 kB)
Downloading botocore-1.40.42-py3-none-any.whl (14.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31

In [4]:
import sagemaker
import boto3
from sagemaker.huggingface.model import HuggingFaceModel
import time


try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client("iam")
    role = iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]

# print(f"sage maker role arn: {role}")
# print(f"sagemaker session region:{sess.boto_region_name}")

In [6]:
# Task 1: Question-Answering

# Start timing - model setup
start_setup = time.time()

# Define the Hugging Face model ID and task
hub = {
    'HF_MODEL_ID': 'distilbert-base-cased-distilled-squad',  # Correct model ID
    'HF_TASK': 'question-answering'
}

# Create HuggingFaceModel instance
huggingface_model = HuggingFaceModel(
    env=hub,
    role=role,
    transformers_version='4.26',
    pytorch_version='1.13',
    py_version='py39'
)

# Deploy the model to an ml.t3.medium instance
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large'
)

# Define input data for prediction
data = {
    "inputs": {
        "question": "What is used for inference?",
        "context": "This model is used with SageMaker to answer the questions."
    }
}

# Perform prediction using the deployed model
result = predictor.predict(data)

# Print the prediction result
print(result)

setup_time = time.time() - start_setup
print(f"Model setup time: {setup_time:.2f} seconds")

-----!{'score': 0.13097743690013885, 'start': 24, 'end': 33, 'answer': 'SageMaker'}


In [9]:
# Task 2: Text Generation


# Start timing - model setup
start_setup = time.time()

role = sagemaker.get_execution_role()
hub = {
    'HF_MODEL_ID': 'distilbert-base-uncased-finetuned-sst-2-english',
    'HF_TASK': 'text-classification'
}
# Create HuggingFaceModel instance
huggingface_model = HuggingFaceModel(
    env=hub,
    role=role,
    transformers_version='4.26',
    pytorch_version='1.13',
    py_version='py39'
)
# Deploy the model to an ml.t3.medium instance
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large'
)
# Input data for text classification
data = {
    "inputs": "I love this movie, it is awesome!"
}

# Perform prediction
result = predictor.predict(data)
print(result)


setup_time = time.time() - start_setup
print(f"Model setup time: {setup_time:.2f} seconds")

------![{'label': 'POSITIVE', 'score': 0.999879002571106}]


In [10]:
# Task 3: Text Classification


# Start timing - model setup
start_setup = time.time()


# text generation
from sagemaker.huggingface.model import HuggingFaceModel

hub = {
    'HF_MODEL_ID': 'distilgpt2',
    'HF_TASK': 'text-generation'
}

# role_id = 'nn' #the role created in IAM with SageMaker Full Access
role_id = "arn:aws:iam:::role/service-role/AmazonSageMaker-ExecutionRole-"


huggingface_model = HuggingFaceModel(
    env=hub,
    role=role_id,
    transformers_version='4.26',
    pytorch_version='1.13',
    py_version='py39'
)

predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large'
)

data = {
    "inputs": "The future of AI is"
}

result = predictor.predict(data)
print(result)



setup_time = time.time() - start_setup
print(f"Model setup time: {setup_time:.2f} seconds")

------![{'generated_text': "The future of AI is in question. AI is an essential tool in helping us better understand why people are so interested in the world. When we think about the things that we're doing, we also think of things we might want to do, and"}]
