In [1]:
!pip install --upgrade boto3 sagemaker

Collecting boto3
  Downloading boto3-1.34.131-py3-none-any.whl.metadata (6.6 kB)
Collecting sagemaker
  Downloading sagemaker-2.224.1-py3-none-any.whl.metadata (15 kB)
Collecting botocore<1.35.0,>=1.34.131 (from boto3)
  Downloading botocore-1.34.131-py3-none-any.whl.metadata (5.7 kB)
Downloading boto3-1.34.131-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sagemaker-2.224.1-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading botocore-1.34.131-py3-none-any.whl (12.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m59.9 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: botocore, boto3, sagemaker
  Attempting uninstall: botocore
    Found existing installation: botocore 1.34.1

In [2]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker role arn: arn:aws:iam::765477734195:role/AmazonSageMaker-ExecutionRole
sagemaker session region: us-east-2


In [3]:
from sagemaker.huggingface import get_huggingface_llm_image_uri

# retrieve the llm image uri
llm_image = get_huggingface_llm_image_uri(
  "huggingface",
  version="0.8.2"
)

# print ecr image uri
print(f"llm image uri: {llm_image}")

llm image uri: 763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.0-tgi0.8.2-gpu-py39-cu118-ubuntu20.04


In [10]:
import json
from sagemaker.huggingface import HuggingFaceModel

# sagemaker config
instance_type = "ml.m5.2xlarge"
number_of_gpu = 4

# TGI config
config = {
  'HF_MODEL_ID': "mistralai/Mistral-7B-v0.1", # model id from hf.co/models
  'SM_NUM_GPUS': json.dumps(number_of_gpu), # Number of GPU used per replica
  'MAX_INPUT_LENGTH': json.dumps(6000),  # Max length of input text
  'MAX_TOTAL_TOKENS': json.dumps(2048),  # Max length of the generation (including input text)
  # 'HF_MODEL_QUANTIZE': "bitsandbytes", # comment in to quantize
}

# create HuggingFaceModel
llm_model = HuggingFaceModel(
  role=role,
  image_uri=llm_image,
  env=config
)

In [None]:
# Deploy model to an endpoint

llm = llm_model.deploy(
  initial_instance_count=1,
  instance_type=instance_type,
  # volume_size=400, # If using an instance with local SSD storage, volume_size must be None, e.g. p4 but not p3
)

-------------------------------

In [None]:
# define payload
prompt = """You are an helpful Assistant. Knowing everyting about AWS.

User: Can you tell me something about Amazon SageMaker?
Answer:"""

# hyperparameters for llm
payload = {
  "inputs": prompt,
  "parameters": {
    "do_sample": True,
    "top_p": 0.9,
    "temperature": 0.8,
    "max_new_tokens": 1024,
    "repetition_penalty": 1.03,
    "stop": ["\nUser:","<|endoftext|>","</s>"]
  }
}

# send request to endpoint
response = llm.predict(payload)

for seq in response:
    print(f"Result: {seq['generated_text']}")

In [None]:
llm.delete_model()
llm.delete_endpoint()

In [None]:
# import json
# import sagemaker
# import boto3
# from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

# try:
# 	role = sagemaker.get_execution_role()
# except ValueError:
# 	iam = boto3.client('iam')
# 	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

# # Hub Model configuration. https://huggingface.co/models
# hub = {
# 	'HF_MODEL_ID':'mistralai/Mistral-7B-v0.1',
# 	'SM_NUM_GPUS': json.dumps(8),
# 	'HUGGING_FACE_HUB_TOKEN': '<REPLACE WITH YOUR TOKEN>'
# }

# assert hub['HUGGING_FACE_HUB_TOKEN'] != '<REPLACE WITH YOUR TOKEN>', "You have to provide a token."

# # create Hugging Face Model Class
# huggingface_model = HuggingFaceModel(
# 	image_uri=get_huggingface_llm_image_uri("huggingface",version="2.0.2"),
# 	env=hub,
# 	role=role, 
# )

# # deploy model to SageMaker Inference
# predictor = huggingface_model.deploy(
# 	initial_instance_count=1,
# 	instance_type="ml.p5.48xlarge",
# 	container_startup_health_check_timeout=2100,
#   )
  
# # send request
# predictor.predict({
# 	"inputs": "My name is Julien and I like to",
# })